From 5c5a8166da31a46eeecdfd6a690a6f4c595a7b8e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 6 Aug 2025 15:08:34 +0100 Subject: [PATCH 01/59] rrdp-dcmi: Detect more errors on discovery Power management is an optional feature and not always available, even if the devices are present. Commands may fail with different error codes, which correspond to a reason, it's good to log this to be able to reference the IPMI 2.0 spec. Signed-off-by: Pau Ruiz Safont --- ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml | 79 ++++++++++++++++------- 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml b/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml index 683e1174b8d..45fbb5287c1 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml @@ -29,13 +29,56 @@ let ipmitool args = (* we connect to the local /dev/ipmi0 if available to read measurements from local BMC *) ipmitool_bin :: args |> String.concat " " -type discovery_error = Devices_missing +type discovery_error = + | Devices_missing + | Power_management_unavailable + | Unknown + | Command of string list * string * int + (** command, reason, IPMI 2.0 spec error code *) let discovery_error_to_string = function | Devices_missing -> "IPMI devices are missing" + | Power_management_unavailable -> + "Power management is not available" + | Command (command, reason, code) -> + Printf.sprintf "Command %s failed because %s (%x)" + (String.concat " " command) + reason code + | Unknown -> + "unknown" + +let result_of_exec_cmd ~default = function + | [], [] -> + default + | [], err :: _ -> + Error err + | oks, _ -> + Ok oks + +let get_dcmi_power_reading () = + let command = ["power"; "reading"] in + let read_out_line line = + (* example line: ' Instantaneous power reading: 34 Watts' *) + try Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some + with _ -> None + in + let read_err_line line = + (* example line: ' DCMI request failed because: Invalid command (c1)' *) + try + Scanf.sscanf line " DCMI request failed because: %S@(%x)" + (fun reason code -> Some (Command (command, reason, code)) + ) + with _ -> None + in + Utils.exec_cmd + (module Process.D) + ~cmdstring:(ipmitool ("dcmi" :: command)) + ~read_out_line ~read_err_line + |> result_of_exec_cmd ~default:(Error Unknown) let discover () = + let ( let* ) = Result.bind in let read_out_line line = (* this code runs once on startup, logging all the output here will be useful for debugging *) D.debug "DCMI discover: %s" line ; @@ -54,22 +97,14 @@ let discover () = else None in - Utils.exec_cmd - (module Process.D) - ~cmdstring:(ipmitool ["dcmi"; "discover"]) - ~read_out_line ~read_err_line - -let get_dcmi_power_reading () = - let read_out_line line = - (* example line: ' Instantaneous power reading: 34 Watts' *) - try Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some - with _ -> None + let* (_ : unit list) = + Utils.exec_cmd + (module Process.D) + ~cmdstring:(ipmitool ["dcmi"; "discover"]) + ~read_out_line ~read_err_line + |> result_of_exec_cmd ~default:(Error Power_management_unavailable) in - let read_err_line _ = None in - Utils.exec_cmd - (module Process.D) - ~cmdstring:(ipmitool ["dcmi"; "power"; "reading"]) - ~read_out_line ~read_err_line + get_dcmi_power_reading () let gen_dcmi_power_reading value = ( Rrd.Host @@ -81,7 +116,7 @@ let gen_dcmi_power_reading value = let generate_dss () = match get_dcmi_power_reading () with - | watts :: _, _ -> + | Ok (watts :: _) -> [gen_dcmi_power_reading watts] | _ -> [] @@ -89,15 +124,11 @@ let generate_dss () = let _ = initialise () ; match discover () with - | () :: _, _ -> + | Ok _ -> D.info "IPMI DCMI power reading is available" ; main_loop ~neg_shift:0.5 ~target:(Reporter.Local 1) ~protocol:Rrd_interface.V2 ~dss_f:generate_dss - | [], errs -> - let reason = - List.nth_opt errs 0 - |> Option.map discovery_error_to_string - |> Option.value ~default:"unknown" - in + | Error reason -> + let reason = discovery_error_to_string reason in D.warn "IPMI DCMI power readings not available, stopping. Reason: %s" reason From e815c5c3bb9e6a7f4163b7a1d24a43e3c2fa13aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 10:39:58 +0000 Subject: [PATCH 02/59] CA-420968: compute the amount of physical cores available on a NUMA node set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Could also compute it by multiplying it with [threads_per_core], but I'm not sure how that'd interact with [smt=false] in Xen. Also to future-proof this I wouldn't want to rely on an entirely symmetrical architecture (although it'd be very rare to have anything other than 2 on x86-64, or to have hyperthreading on in one socket, and off in another). Note that core ids are not unique (there is a core `0` on both socket 0 and socket 1 for example), so only work with number of cores in the topology code. Could've created a CoreSocketSet instead (assuming that no higher grouping than sockets would exist in the future), but for now don't make too many assumptions about topology. No functional change. Signed-off-by: Edwin Török --- ocaml/xenopsd/lib/topology.ml | 21 ++++++++++++++------- ocaml/xenopsd/lib/topology.mli | 28 ++++++++++++++++++++++------ ocaml/xenopsd/test/test_topology.ml | 24 ++++++++++++++++++------ ocaml/xenopsd/xc/domain.ml | 14 ++++++++++++-- 4 files changed, 66 insertions(+), 21 deletions(-) diff --git a/ocaml/xenopsd/lib/topology.ml b/ocaml/xenopsd/lib/topology.ml index a2cd401a0cc..7234153505c 100644 --- a/ocaml/xenopsd/lib/topology.ml +++ b/ocaml/xenopsd/lib/topology.ml @@ -28,19 +28,20 @@ module CPUSet = struct end module NUMAResource = struct - type t = {affinity: CPUSet.t; memfree: int64} + type t = {affinity: CPUSet.t; cores: int; memfree: int64} - let make ~affinity ~memfree = + let make ~affinity ~cores ~memfree = if memfree < 0L then invalid_arg (Printf.sprintf "NUMAResource: memory cannot be negative: %Ld" memfree) ; - {affinity; memfree} + {affinity; cores; memfree} - let empty = {affinity= CPUSet.empty; memfree= 0L} + let empty = {affinity= CPUSet.empty; cores= 0; memfree= 0L} let union a b = make ~affinity:(CPUSet.union a.affinity b.affinity) + ~cores:(a.cores + b.cores) ~memfree:(Int64.add a.memfree b.memfree) let min_memory r1 r2 = {r1 with memfree= min r1.memfree r2.memfree} @@ -50,6 +51,7 @@ module NUMAResource = struct Dump.record [ Dump.field "affinity" (fun t -> t.affinity) CPUSet.pp_dump + ; Dump.field "cores" (fun t -> t.cores) int ; Dump.field "memfree" (fun t -> t.memfree) int64 ] ) @@ -134,6 +136,7 @@ module NUMA = struct distances: int array array ; cpu_to_node: node array ; node_cpus: CPUSet.t array + ; node_cores: int array ; all: CPUSet.t ; node_usage: int array (** Usage across nodes is meant to be balanced when choosing candidates for a VM *) @@ -203,7 +206,7 @@ module NUMA = struct |> seq_sort ~cmp:dist_cmp |> Seq.map (fun ((_, avg), nodes) -> (avg, Seq.map (fun n -> Node n) nodes)) - let make ~distances ~cpu_to_node = + let make ~distances ~cpu_to_node ~node_cores = let ( let* ) = Option.bind in let node_cpus = Array.map (fun _ -> CPUSet.empty) distances in @@ -256,6 +259,7 @@ module NUMA = struct distances ; cpu_to_node= Array.map node_of_int cpu_to_node ; node_cpus + ; node_cores ; all ; node_usage= Array.map (fun _ -> 0) distances ; candidates @@ -265,6 +269,8 @@ module NUMA = struct let cpuset_of_node t (Node i) = t.node_cpus.(i) + let coreset_of_node t (Node i) = t.node_cores.(i) + let node_of_cpu t i = t.cpu_to_node.(i) let nodes t = @@ -278,8 +284,8 @@ module NUMA = struct {t with node_cpus; all} let resource t node ~memory = - let affinity = cpuset_of_node t node in - NUMAResource.make ~affinity ~memfree:memory + let affinity = cpuset_of_node t node and cores = coreset_of_node t node in + NUMAResource.make ~affinity ~cores ~memfree:memory let candidates t = t.candidates @@ -316,6 +322,7 @@ module NUMA = struct ; Dump.field "node_cpus" (fun t -> t.node_cpus) (Dump.array CPUSet.pp_dump) + ; Dump.field "node_cores" (fun t -> t.node_cores) (Dump.array int) ] ) end diff --git a/ocaml/xenopsd/lib/topology.mli b/ocaml/xenopsd/lib/topology.mli index f1bd6f9f569..d9263b58325 100644 --- a/ocaml/xenopsd/lib/topology.mli +++ b/ocaml/xenopsd/lib/topology.mli @@ -29,10 +29,21 @@ end module NUMAResource : sig (** A NUMA node providing CPU and memory resources *) - type t = private {affinity: CPUSet.t; memfree: int64} - - val make : affinity:CPUSet.t -> memfree:int64 -> t - (** [make ~affinity ~memfree] constructs a resource requiring affinity to be + type t = private { + affinity: CPUSet.t + (** logical CPUs. This is the smallest unit of scheduling available, + e.g. a hyperthread. + This can be used directly as a soft-, or hard-affinity mask. *) + ; cores: int + (** number of physical cores fully contained in this node, each containing threads_per_core CPUs, + although some of them may be disabled if [smt=false] *) + ; memfree: int64 + (** free (not reserved, not in use) memory available on this NUMA + node or set of NUMA nodes *) + } + + val make : affinity:CPUSet.t -> cores:int -> memfree:int64 -> t + (** [make ~affinity ~cores ~memfree] constructs a resource requiring affinity to be non-empty and memfree to be > 0. A zero request is allowed due to [shrink]. * *) @@ -78,8 +89,12 @@ module NUMA : sig (** A NUMA node index. Distinct from an int to avoid mixing with CPU numbers *) type node = private Node of int - val make : distances:int array array -> cpu_to_node:int array -> t option - (** [make distances cpu_to_node] stores the topology. [distances] is a square + val make : + distances:int array array + -> cpu_to_node:int array + -> node_cores:int array + -> t option + (** [make distances cpu_to_node node_cores] stores the topology. [distances] is a square matrix [d] where [d.(i).(j)] is an approximation to how much slower it is to access memory from node [j] when running on node [i]. Distances are normalized to 10, [d.(i).(i)] must equal to 10, and all values must be >= @@ -94,6 +109,7 @@ module NUMA : sig in Xen and then to -1 by the bindings). [cpu_to_nodes.(i)] = NUMA node of CPU [i] + [node_cores.(i)] = number of cores on NUMA node [i] NUMA nodes without any CPUs are accepted (to handle hard affinities). diff --git a/ocaml/xenopsd/test/test_topology.ml b/ocaml/xenopsd/test/test_topology.ml index d9945ed8018..629d42343b4 100644 --- a/ocaml/xenopsd/test/test_topology.ml +++ b/ocaml/xenopsd/test/test_topology.ml @@ -58,25 +58,35 @@ module Distances = struct (numa, distances) end -let make_numa_common ~cores_per_numa (distances : Distances.t) = +let make_numa_common ~logical_per_physical ~cores_per_numa + (distances : Distances.t) = + (* cores_per_numa refers to logical cores, i.e. cpus *) let numa, distances = distances in let cpu_to_node = - Array.init (cores_per_numa * numa) (fun core -> core / cores_per_numa) + Array.init (cores_per_numa * numa) (fun cpu -> cpu / cores_per_numa) + and node_cores = + (* core here refers to physical *) + Array.init numa (fun _ -> cores_per_numa / logical_per_physical) in Option.map (fun d -> (cores_per_numa * numa, d)) - (NUMA.make ~distances ~cpu_to_node) + (NUMA.make ~distances ~cpu_to_node ~node_cores) let make_numa ~numa ~cores = let cores_per_numa = cores / numa in - match make_numa_common ~cores_per_numa (Distances.example numa) with + match + make_numa_common ~logical_per_physical:2 ~cores_per_numa + (Distances.example numa) + with | None -> Alcotest.fail "Synthetic matrix can't fail to load" | Some d -> d let make_numa_amd ~cores_per_numa = - match make_numa_common ~cores_per_numa Distances.opteron with + match + make_numa_common ~cores_per_numa ~logical_per_physical:2 Distances.opteron + with | None -> Alcotest.fail "Synthetic matrix can't fail to load" | Some d -> @@ -304,7 +314,9 @@ let distances_tests = in let test_of_spec (name, distances, expected) = let test () = - let numa_t = make_numa_common ~cores_per_numa:1 distances in + let numa_t = + make_numa_common ~logical_per_physical:1 ~cores_per_numa:1 distances + in match (expected, numa_t) with | None, None -> () diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 4af94d7b96c..3548d51493f 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -908,8 +908,18 @@ let numa_hierarchy = lazy (let xcext = get_handle () in let distances = (numainfo xcext).distances in - let cpu_to_node = cputopoinfo xcext |> Array.map (fun t -> t.node) in - NUMA.make ~distances ~cpu_to_node + let topoinfo = cputopoinfo xcext in + let core t = t.core and node t = t.node in + let cpu_to_node = topoinfo |> Array.map node + and node_cores = + let module IntSet = Set.Make (Int) in + let a = Array.make (Array.length distances) IntSet.empty in + Array.iter + (fun t -> a.(node t) <- IntSet.add (core t) a.(node t)) + topoinfo ; + Array.map IntSet.cardinal a + in + NUMA.make ~distances ~cpu_to_node ~node_cores ) let numa_mutex = Mutex.create () From ff659cfb6664751705cb2d1dd72bea3b63011784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 27 Nov 2025 10:44:47 +0000 Subject: [PATCH 03/59] CA-420968: ensure compatibility between NUMARequest.fits and plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The planner explicitly looks at the NUMARequest fields and checks that they are non-zero. However if more fields get added in the future this leads to an assertion failure, where the planner thinks it has found a solution, but NUMARequest.fits returns false. Ensure consistency: use `fits` in the planner to check that we've reached a solution. If the remaining request doesn't fit into an empty node, then the request is not empty. Signed-off-by: Edwin Török --- ocaml/xenopsd/lib/softaffinity.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/lib/softaffinity.ml b/ocaml/xenopsd/lib/softaffinity.ml index 1e7231506da..10fbdbea786 100644 --- a/ocaml/xenopsd/lib/softaffinity.ml +++ b/ocaml/xenopsd/lib/softaffinity.ml @@ -39,7 +39,7 @@ let plan host nodes ~vm = (Fmt.to_to_string NUMAResource.pp_dump allocated) (Fmt.to_to_string NUMARequest.pp_dump remaining) avg ; - if remaining.NUMARequest.memory > 0L || remaining.NUMARequest.vcpus > 0 then + if not (NUMARequest.fits remaining NUMAResource.empty) then (* [vm] doesn't fit on these nodes *) None else From 3671ba0e7e1364aa76304b61be46f7bfbbf7b3a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 15:20:28 +0000 Subject: [PATCH 04/59] CA-420968: track number of physical cores during a NUMA planning request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The requested number of cores is still 0, so no functional change. Signed-off-by: Edwin Török --- ocaml/xenopsd/lib/topology.ml | 20 +++++++++++++++++--- ocaml/xenopsd/lib/topology.mli | 8 ++++---- ocaml/xenopsd/lib/xenops_server.ml | 2 ++ ocaml/xenopsd/test/test_topology.ml | 2 +- ocaml/xenopsd/xc/domain.ml | 11 ++++++----- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/ocaml/xenopsd/lib/topology.ml b/ocaml/xenopsd/lib/topology.ml index 7234153505c..f4291ec06f3 100644 --- a/ocaml/xenopsd/lib/topology.ml +++ b/ocaml/xenopsd/lib/topology.ml @@ -58,23 +58,36 @@ module NUMAResource = struct end module NUMARequest = struct - type t = {memory: int64; vcpus: int} + type t = {memory: int64; vcpus: int; cores: int} - let make ~memory ~vcpus = + let make ~memory ~vcpus ~cores = if Int64.compare memory 0L < 0 then invalid_arg (Printf.sprintf "NUMARequest: memory must be > 0: %Ld" memory) ; if vcpus < 0 then invalid_arg (Printf.sprintf "vcpus cannot be negative: %d" vcpus) ; - {memory; vcpus} + if cores < 0 then + invalid_arg (Printf.sprintf "cores cannot be negative: %d" cores) ; + {memory; vcpus; cores} let fits requested available = + (* this is a hard constraint: a VM cannot boot if it doesn't have + enough memory *) Int64.compare requested.memory available.NUMAResource.memfree <= 0 + (* this is a soft constraint: a VM can still boot if the (soft) affinity + constraint is not met, although if hard affinity is used this is a hard + constraint too *) && CPUSet.(cardinal available.NUMAResource.affinity >= requested.vcpus) + && (* this is an optional constraint: it is desirable to be able to leave + hyperthread siblings idle, when the system is not busy. + However requested.cores can also be 0. + *) + available.NUMAResource.cores >= requested.cores let shrink a b = make ~memory:(max 0L (Int64.sub a.memory b.NUMAResource.memfree)) ~vcpus:(max 0 (a.vcpus - CPUSet.cardinal b.NUMAResource.affinity)) + ~cores:(max 0 (a.cores - b.NUMAResource.cores)) let pp_dump = Fmt.( @@ -82,6 +95,7 @@ module NUMARequest = struct [ Dump.field "memory" (fun t -> t.memory) int64 ; Dump.field "vcpus" (fun t -> t.vcpus) int + ; Dump.field "cores" (fun t -> t.cores) int ] ) end diff --git a/ocaml/xenopsd/lib/topology.mli b/ocaml/xenopsd/lib/topology.mli index d9263b58325..8211ffa4ec2 100644 --- a/ocaml/xenopsd/lib/topology.mli +++ b/ocaml/xenopsd/lib/topology.mli @@ -62,11 +62,11 @@ end module NUMARequest : sig (** A (VM) requesting resources *) - type t = private {memory: int64; vcpus: int} + type t = private {memory: int64; vcpus: int; cores: int} - val make : memory:int64 -> vcpus:int -> t - (**[make ~memory ~vcpus] constructs a request. [memory] and [vcpus] must be - strictly positive. *) + val make : memory:int64 -> vcpus:int -> cores:int -> t + (**[make ~memory ~vcpus ~cores] constructs a request. [memory], [vcpus] and + [cores] must be strictly positive. *) val fits : t -> NUMAResource.t -> bool (** [fits requested available] checks whether the [available] resources can diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 6a06b36ba14..97c01d89c94 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3627,6 +3627,8 @@ let affinity_of_numa_affinity_policy = let open Xenops_interface.Host in function Any | Best_effort -> Soft | Best_effort_hard -> Hard +let cores_of_numa_affinity_policy _policy ~vcpus:_ = 0 + module HOST = struct let stat _ dbg = Debug.with_thread_associated dbg diff --git a/ocaml/xenopsd/test/test_topology.ml b/ocaml/xenopsd/test/test_topology.ml index 629d42343b4..f3d40d0f42d 100644 --- a/ocaml/xenopsd/test/test_topology.ml +++ b/ocaml/xenopsd/test/test_topology.ml @@ -216,7 +216,7 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () = |> List.fold_left (fun (costs_old, costs_new, plans) i -> D.debug "Planning VM %d" i ; - let vm = NUMARequest.make ~memory:mem ~vcpus:vm_cores in + let vm = NUMARequest.make ~memory:mem ~vcpus:vm_cores ~cores:0 in match Softaffinity.plan h nodes ~vm with | None -> Alcotest.fail "No NUMA plan" diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 3548d51493f..a2da2345e4b 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -945,7 +945,7 @@ let set_affinity = function | Xenops_server.Soft -> Xenctrlext.vcpu_setaffinity_soft -let numa_placement domid ~vcpus ~memory affinity = +let numa_placement domid ~vcpus ~cores ~memory affinity = let open Xenctrlext in let open Topology in with_lock numa_mutex (fun () -> @@ -959,7 +959,7 @@ let numa_placement domid ~vcpus ~memory affinity = numa_meminfo ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) in - let vm = NUMARequest.make ~memory ~vcpus in + let vm = NUMARequest.make ~memory ~vcpus ~cores in let nodea = match !numa_resources with | None -> @@ -1096,10 +1096,11 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = D.debug "VM has hard affinity set, skipping NUMA optimization" ; None ) else - let affinity = - Xenops_server.affinity_of_numa_affinity_policy pin + let affinity = Xenops_server.affinity_of_numa_affinity_policy pin + and cores = + Xenops_server.cores_of_numa_affinity_policy pin ~vcpus in - numa_placement domid ~vcpus + numa_placement domid ~vcpus ~cores ~memory:(Int64.mul memory.xen_max_mib 1048576L) affinity |> Option.map fst From 5d32507881544c05246fbfec7e0a511dd9971f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 16:09:40 +0000 Subject: [PATCH 05/59] CA-420968: introduce an explicit name for the current NUMA policy: Prio_mem_only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current NUMA policy prioritizes reducing cross-NUMA node memory traffic by picking the smallest set of NUMA nodes that fit a VM. It doesn't look at how this affects CPU overload within a NUMA node, or whether the local bandwidth of each NUMA node is balanced or not. Give this policy an explicit name, `Prio_mem_only`, and when the "compat" setting in `xenopsd.conf` is used (`numa-placement=true`), then explicitly use this policy instead of Best-effort. Currently Best-effort is still equivalent to this policy, but that'll change in a follow-up commit. Introduce a new xenopsd.conf entry `numa-best-effort-prio-mem-only`, which can be used to explicitly revert best effort to the current policy. (currently this is a no-op, because there is only one best-effort policy). Future policies should also look at CPU overload. No functional change. Signed-off-by: Edwin Török --- ocaml/xapi-idl/xen/xenops_interface.ml | 2 ++ ocaml/xenopsd/lib/xenops_server.ml | 9 +++++++-- ocaml/xenopsd/lib/xenopsd.ml | 9 +++++++++ ocaml/xenopsd/xc/domain.ml | 2 +- ocaml/xenopsd/xc/xenops_server_xen.ml | 8 +++++++- 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index a67c51b0131..9ddcd9753a1 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -494,6 +494,8 @@ module Host = struct node, and soft-pins its VCPUs to the node, if possible. Otherwise behaves like Any. *) | Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *) + | Prio_mem_only + (** Prioritizes reducing memory bandwidth, ignores CPU overload *) [@@deriving rpcty] type numa_affinity_policy_opt = numa_affinity_policy option [@@deriving rpcty] diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 97c01d89c94..e9fab3b4482 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3622,12 +3622,17 @@ let string_of_numa_affinity_policy = "best-effort" | Best_effort_hard -> "best-effort-hard" + | Prio_mem_only -> + "prio-mem-only" let affinity_of_numa_affinity_policy = let open Xenops_interface.Host in - function Any | Best_effort -> Soft | Best_effort_hard -> Hard + function + | Any | Best_effort | Prio_mem_only -> Soft | Best_effort_hard -> Hard -let cores_of_numa_affinity_policy _policy ~vcpus:_ = 0 +let cores_of_numa_affinity_policy policy ~vcpus:_ = + let open Xenops_interface.Host in + match policy with _ -> 0 module HOST = struct let stat _ dbg = diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index d4a08e92be7..c5242073237 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -70,6 +70,8 @@ let pvinpvh_xen_cmdline = ref "pv-shim console=xen" let numa_placement_compat = ref true +let numa_best_effort_prio_mem_only = ref false + (* O(N^2) operations, until we get a xenstore cache, so use a small number here *) let vm_guest_agent_xenstore_quota = ref 128 @@ -263,6 +265,13 @@ let options = , (fun () -> string_of_bool !numa_placement_compat) , "NUMA-aware placement of VMs (deprecated, use XAPI setting)" ) + ; ( "numa-best-effort-prio-mem-only" + , Arg.Bool (fun x -> numa_best_effort_prio_mem_only := x) + , (fun () -> string_of_bool !numa_best_effort_prio_mem_only) + , "Revert to the previous 'best effort' NUMA policy, where we only \ + filtered NUMA nodes based on available memory. Only use if there are \ + issues with the new best effort policy" + ) ; ( "pci-quarantine" , Arg.Bool (fun b -> pci_quarantine := b) , (fun () -> string_of_bool !pci_quarantine) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index a2da2345e4b..90df781d8f2 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1090,7 +1090,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = match !Xenops_server.numa_placement with | Any -> None - | (Best_effort | Best_effort_hard) as pin -> + | (Best_effort | Best_effort_hard | Prio_mem_only) as pin -> log_reraise (Printf.sprintf "NUMA placement") (fun () -> if hard_affinity <> [] then ( D.debug "VM has hard affinity set, skipping NUMA optimization" ; diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 5274569ef4e..1ea719479d1 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -5389,7 +5389,13 @@ let init () = ) ; Device.Backend.init () ; Xenops_server.default_numa_affinity_policy := - if !Xenopsd.numa_placement_compat then Best_effort else Any ; + if !Xenopsd.numa_placement_compat then + if !Xenopsd.numa_best_effort_prio_mem_only then + Prio_mem_only + else + Best_effort + else + Any ; info "Default NUMA affinity policy is '%s'" Xenops_server.(string_of_numa_affinity_policy !default_numa_affinity_policy) ; Xenops_server.numa_placement := !Xenops_server.default_numa_affinity_policy ; From b0978541760225bdad31b69e034398ae0c750b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 26 Nov 2025 16:14:54 +0000 Subject: [PATCH 06/59] CA-420968: avoid large performance hit on small NUMA nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NUMA optimized placement can have a large performance hit on machines with small NUMA nodes and VMs with a large number of vCPUs. For example a machine that has 2 sockets, which can run at most 32 vCPUs in a single socket (NUMA node), and a VM with 32 vCPUs. Usually Xen would try to spread the load across actual cores, and avoid the hyperthread siblings, e.g. using CPUs 0,2,4,etc. But when NUMA placement is used all the vCPUs must be in the same NUMA node. If that NUMA node doesn't have enough cores, then Xen will have no choice but to use CPUs 0,1,2,3,etc. Hyperthread siblings share resources, and if you try to use both at the same time you get a big performance hit, depending on the workload. Avoid this by "requesting" cores=vcpus for each VM, which will make the placement algorithm choose the next size up in terms of NUMA nodes (i.e. instead of 1 NUMA node, use 2,3 as needed, falling back to using all nodes if needed). The potential gain from reducing memory latency with a NUMA optimized placement (~20% on Intel Memory Latency Checker: Idle latency) is outweighed by the potential loss due to reduced CPU capacity (40%-75% on OpenSSL, POV-Ray, and OpenVINO), so this is the correct trade-off. If the NUMA node is large enough, or if the VMs have a small number of vCPUs then we still try to use a single NUMA node as we did previously. The performance difference can be reproduced and verified easily by running `openssl speed -multi 32 rsa4096` on a 32 vCPU VM on a host that has 2 NUMA nodes, with 32 PCPUs each, and 2 threads per core. This introduces a policy that can control whether we want to filter out NUMA nodes with too few cores. Although we want to enable this filter by default, we still want an "escape hatch" to turn it off if we find problems with it. That is why the "compat" setting (numa_placement=true) in xenopsd.conf reverts back to the old policy, which is now named explicitly as Prio_mem_only. There could still be workloads where optimizing for memory bandwidth makes more sense (although that is a property of the NUMA node, not of individual VMs), so although it might be desirable for this to be a VM policy, it cannot, because it affects other VMs too. TODO: when sched-gran=core this should be turned off. That always has the performance hit, so might as well use smaller NUMA nodes if available. For now this isn't exposed yet as a XAPI-level policy, because that requires more changes (to also sort by free cores on a node, and to also sort at the pool level by free cpus on a host). Once we have those changes we can introduce a new policy `prio_core_mem` to sort by free cores first, then by free memory, and requires cores>=vcpus (i.e. cpus>=vcpus*threads_per_cores) when choosing a node. This changes the default to the new setting, which should be equal or an improvement in the general case. An "escape hatch" to revert to the previous behaviour is to set `numa-placement=true` in xenopsd.conf, and the XAPI host-level policy to 'default_policy'. Signed-off-by: Edwin Török --- ocaml/xapi-idl/xen/xenops_interface.ml | 4 +++- ocaml/xenopsd/lib/xenops_server.ml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 9ddcd9753a1..3920736afd5 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -492,7 +492,9 @@ module Host = struct | Best_effort (** Best-effort placement. Assigns the memory of the VM to a single node, and soft-pins its VCPUs to the node, if possible. Otherwise - behaves like Any. *) + behaves like Any. + The node(s) need to have enough cores to run all the vCPUs of the VM + *) | Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *) | Prio_mem_only (** Prioritizes reducing memory bandwidth, ignores CPU overload *) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index e9fab3b4482..9703f4c2a93 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -3630,9 +3630,9 @@ let affinity_of_numa_affinity_policy = function | Any | Best_effort | Prio_mem_only -> Soft | Best_effort_hard -> Hard -let cores_of_numa_affinity_policy policy ~vcpus:_ = +let cores_of_numa_affinity_policy policy ~vcpus = let open Xenops_interface.Host in - match policy with _ -> 0 + match policy with Any | Prio_mem_only -> 0 | _ -> vcpus module HOST = struct let stat _ dbg = From 2dbbbd0e9571c3338dc8d080f0efdd100c08999e Mon Sep 17 00:00:00 2001 From: Guillaume Date: Thu, 27 Nov 2025 11:26:04 +0100 Subject: [PATCH 07/59] [doc] add missing command to xs-trace Signed-off-by: Guillaume --- doc/content/toolstack/features/Tracing/index.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/content/toolstack/features/Tracing/index.md b/doc/content/toolstack/features/Tracing/index.md index c54441bbb68..4c90c570699 100644 --- a/doc/content/toolstack/features/Tracing/index.md +++ b/doc/content/toolstack/features/Tracing/index.md @@ -81,14 +81,17 @@ and also assist newcomers in onboarding to the project. By default, traces are generated locally in the `/var/log/dt` directory. You can copy or forward these traces to another location or endpoint using the `xs-trace` tool. For example, if you have -a *Jaeger* server running locally, you can run: +a *Jaeger* server running locally, you can copy a trace to an endpoint by running: ```sh -xs-trace /var/log/dt/ http://127.0.0.1:9411/api/v2/spans +xs-trace cp /var/log/dt/ http://127.0.0.1:9411/api/v2/spans ``` You will then be able to visualize the traces in Jaeger. +The `xs-trace` tool also supports trace files in `.ndjson` and compressed `.zst` formats, so +you can copy or forward these files directly as well. + ### Tagging Trace Sessions for Easier Search #### Specific attributes From 2809d72786961624a8dd36eb25a7f76cb1f1a3ca Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 19 Jun 2025 13:27:20 +0100 Subject: [PATCH 08/59] numa_placement: use Seq instead of List MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This partially applies the following commit to reduce the complexity of a Xen-4.20 patch: > xenopsd-xc: do not try keep track of free memory when planning NUMA nodes (CA-411684) > > Free memory is now properly accounted for because the memory pages are claimed > within the NUMA mutex, so there's no need to have double tracking. > > On top of that, this code never increased the free memory, which means that it > always reached a point where it was impossible to allocate a domain into a > single numa node. > Signed-off-by: Pau Ruiz Safont However it doesn't actually drop the free memory accounting code, so: No functional change Signed-off-by: Edwin Török --- ocaml/xenopsd/xc/domain.ml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 4af94d7b96c..83556cd8c1e 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -942,20 +942,19 @@ let numa_placement domid ~vcpus ~memory affinity = let ( let* ) = Option.bind in let xcext = get_handle () in let* host = Lazy.force numa_hierarchy in - let numa_meminfo = (numainfo xcext).memory |> Array.to_list in + let numa_meminfo = (numainfo xcext).memory |> Array.to_seq in let nodes = - ListLabels.map2 - (NUMA.nodes host |> List.of_seq) - numa_meminfo - ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) + Seq.map2 + (fun node m -> NUMA.resource host node ~memory:m.memfree) + (NUMA.nodes host) numa_meminfo in let vm = NUMARequest.make ~memory ~vcpus in let nodea = match !numa_resources with | None -> - Array.of_list nodes + Array.of_seq nodes | Some a -> - Array.map2 NUMAResource.min_memory (Array.of_list nodes) a + Array.map2 NUMAResource.min_memory (Array.of_seq nodes) a in numa_resources := Some nodea ; let memory_plan = From c1b1311d246c06f922cb6f72abfe011820342505 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Wed, 26 Nov 2025 07:36:07 +0000 Subject: [PATCH 09/59] CP-309847: Make HTTP/80 configurable - Introduce https_only argument for Host.create - Set https_only from configuration for installation - Keep https_only from joining host during pool join Signed-off-by: Lin Liu --- ocaml/idl/datamodel_host.ml | 9 +++++++++ ocaml/tests/common/test_common.ml | 10 +++++----- ocaml/tests/test_host.ml | 1 + ocaml/xapi/dbsync_slave.ml | 1 + ocaml/xapi/xapi_globs.ml | 7 +++++++ ocaml/xapi/xapi_host.ml | 4 ++-- ocaml/xapi/xapi_host.mli | 1 + ocaml/xapi/xapi_pool.ml | 2 ++ 8 files changed, 28 insertions(+), 7 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 29b5610b226..f35c6e95103 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1398,6 +1398,14 @@ let create_params = ; param_release= numbered_release "25.32.0-next" ; param_default= Some (VMap []) } + ; { + param_type= Bool + ; param_name= "https_only" + ; param_doc= + "updates firewall to open or close port 80 depending on the value" + ; param_release= numbered_release "25.38.0-next" + ; param_default= Some (VBool false) + } ] let create = @@ -1416,6 +1424,7 @@ let create = --console_idle_timeout --ssh_auto_mode options to allow them to be \ configured for new host" ) + ; (Changed, "25.38.0-next", "Added --https_only to disable http") ] ~versioned_params:create_params ~doc:"Create a new host record" ~result:(Ref _host, "Reference to the newly created host object.") diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 09f6a3b465a..7fc190f43c7 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -175,7 +175,7 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(last_software_update = Date.epoch) ?(last_update_hash = "") ?(ssh_enabled = true) ?(ssh_enabled_timeout = 0L) ?(ssh_expiry = Date.epoch) ?(console_idle_timeout = 0L) ?(ssh_auto_mode = false) ?(secure_boot = false) - () = + ?(https_only = false) () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name @@ -184,6 +184,7 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ~secure_boot ~software_version:(Xapi_globs.software_version ()) + ~https_only in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host @@ -194,15 +195,14 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ?(external_auth_type = "") ?(external_auth_service_name = "") ?(external_auth_configuration = []) ?(license_params = []) ?(edition = "free") ?(license_server = []) ?(local_cache_sr = Ref.null) - ?(chipset_info = []) ?(ssl_legacy = false) () = + ?(chipset_info = []) ?(ssl_legacy = false) ?(https_only = false) () = let pool = Helpers.get_pool ~__context in let tls_verification_enabled = Db.Pool.get_tls_verification_enabled ~__context ~self:pool in Db.Host.create ~__context ~ref ~current_operations:[] ~allowed_operations:[] ~software_version:(Xapi_globs.software_version ()) - ~https_only:false ~enabled:false - ~aPI_version_major:Datamodel_common.api_version_major + ~enabled:false ~aPI_version_major:Datamodel_common.api_version_major ~aPI_version_minor:Datamodel_common.api_version_minor ~aPI_version_vendor:Datamodel_common.api_version_vendor ~aPI_version_vendor_implementation: @@ -224,7 +224,7 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~last_update_hash:"" ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false - ~secure_boot:false ; + ~secure_boot:false ~https_only ; ref let make_pif ~__context ~network ~host ?(device = "eth0") diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index bb869d292c0..45ca0d3c2ea 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -27,6 +27,7 @@ let add_host __context name = ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false ~secure_boot:false ~software_version:(Xapi_globs.software_version ()) + ~https_only:false ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 91bea2d25b4..ff325b7259e 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -66,6 +66,7 @@ let create_localhost ~__context info = ~console_idle_timeout:Constants.default_console_idle_timeout ~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default ~secure_boot:false ~software_version:[] + ~https_only:!Xapi_globs.https_only in () diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 5d4fe609b52..161273c83f9 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1133,6 +1133,8 @@ let xapi_requests_cgroup = let genisoimage_path = ref "/usr/bin/genisoimage" +let https_only = ref false + (* Event.{from,next} batching delays *) let make_batching name ~delay_before ~delay_between = let name = Printf.sprintf "%s_delay" name in @@ -1834,6 +1836,11 @@ let other_options = , (fun () -> string_of_int !max_span_depth) , "The maximum depth to which spans are recorded in a trace in Tracing" ) + ; ( "https-only-default" + , Arg.Set https_only + , (fun () -> string_of_bool !https_only) + , "Only expose HTTPS service, disable HTTP/80 in firewall when set to true" + ) ; ( "firewall-backend" , Arg.String (fun s -> diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index ee446592bb9..b9f105610dd 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1029,7 +1029,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode - ~secure_boot ~software_version = + ~secure_boot ~software_version ~https_only = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1064,7 +1064,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address (* no or multiple pools *) in Db.Host.create ~__context ~ref:host ~current_operations:[] - ~allowed_operations:[] ~https_only:false ~software_version ~enabled:false + ~allowed_operations:[] ~https_only ~software_version ~enabled:false ~aPI_version_major:Datamodel_common.api_version_major ~aPI_version_minor:Datamodel_common.api_version_minor ~aPI_version_vendor:Datamodel_common.api_version_vendor diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 316ee9f6edf..b20f4ef3fe9 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -138,6 +138,7 @@ val create : -> ssh_auto_mode:bool -> secure_boot:bool -> software_version:(string * string) list + -> https_only:bool -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index cbb39e28adb..752d822135f 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1033,6 +1033,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : create_or_get_sr_on_master __context rpc session_id (my_local_cache_sr, my_local_cache_sr_rec) in + debug "Creating host object on master" ; let ref = Client.Host.create ~rpc ~session_id ~uuid:my_uuid @@ -1060,6 +1061,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~ssh_auto_mode:host.API.host_ssh_auto_mode ~secure_boot:host.API.host_secure_boot ~software_version:host.API.host_software_version + ~https_only:host.API.host_https_only in (* Copy other-config into newly created host record: *) no_exn From 4da3c01208a9729ba28cd2a5326ee97e8d530f12 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Fri, 24 Feb 2023 14:49:39 +0000 Subject: [PATCH 10/59] CP-31566 define xenopsd fast resume operation Define and implement an operation that uses Xen's fast resume to reume a domain. This operation is currently not used but has been tested. It is accessible from the xenopsd CLI ("xenops-cli") for experiments. Signed-off-by: Christian Lindig --- ocaml/xapi-idl/xen/xenops_interface.ml | 4 ++++ ocaml/xenopsd/cli/main.ml | 20 ++++++++++++++++++++ ocaml/xenopsd/cli/xn.ml | 9 +++++++++ ocaml/xenopsd/cli/xn.mli | 3 +++ ocaml/xenopsd/lib/xenops_server.ml | 10 ++++++++++ ocaml/xenopsd/lib/xenops_server_plugin.ml | 2 ++ ocaml/xenopsd/lib/xenops_server_skeleton.ml | 2 ++ ocaml/xenopsd/xc/domain.ml | 13 +++++++++++++ ocaml/xenopsd/xc/domain.mli | 10 ++++++++++ ocaml/xenopsd/xc/xenops_server_xen.ml | 20 ++++++++++++++++++++ 10 files changed, 93 insertions(+) diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 3920736afd5..f27b4ec00b8 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -855,6 +855,10 @@ module XenopsAPI (R : RPC) = struct declare "VM.resume" [] (debug_info_p @-> vm_id_p @-> disk_p @-> returning task_id_p err) + let fast_resume = + declare "VM.fast_resume" [] + (debug_info_p @-> vm_id_p @-> returning task_id_p err) + let s3suspend = declare "VM.s3suspend" [] (debug_info_p @-> vm_id_p @-> returning task_id_p err) diff --git a/ocaml/xenopsd/cli/main.ml b/ocaml/xenopsd/cli/main.ml index a8111444880..54842be4b31 100644 --- a/ocaml/xenopsd/cli/main.ml +++ b/ocaml/xenopsd/cli/main.ml @@ -317,6 +317,25 @@ let resume_cmd = , Cmd.info "resume" ~sdocs:_common_options ~doc ~man ) +let fast_resume_cmd = + let vm = vm_arg "resumed" in + let doc = "fast-resume a VM" in + let man = + [ + `S "DESCRIPTION" + ; `P "Fast-resume a VM." + ; `P + {|The suspended domain will be resumed + and the VM will be left in a Running state.|} + ; `S "ERRORS" + ; `P "Something about the current power state." + ] + @ help + in + ( Term.(ret (const Xn.fast_resume $ common_options_t $ vm)) + , Cmd.info "fast-resume" ~sdocs:_common_options ~doc ~man + ) + let pause_cmd = let vm = vm_arg "paused" in let doc = "pause a VM" in @@ -491,6 +510,7 @@ let cmds = ; reboot_cmd ; suspend_cmd ; resume_cmd + ; fast_resume_cmd ; pause_cmd ; unpause_cmd ; import_cmd diff --git a/ocaml/xenopsd/cli/xn.ml b/ocaml/xenopsd/cli/xn.ml index 24fecb9cf09..03c8db2e31c 100644 --- a/ocaml/xenopsd/cli/xn.ml +++ b/ocaml/xenopsd/cli/xn.ml @@ -873,6 +873,15 @@ let suspend _copts disk x = let suspend copts disk x = diagnose_error (need_vm (suspend copts disk) x) +let fast_resume _copts x = + let open Vm in + let vm, _ = find_by_name x in + Client.VM.fast_resume dbg vm.id + |> wait_for_task dbg + |> success_task ignore_task + +let fast_resume copts x = diagnose_error (need_vm (fast_resume copts) x) + let resume _copts disk x = (* We don't currently store where the suspend image is *) let disk = diff --git a/ocaml/xenopsd/cli/xn.mli b/ocaml/xenopsd/cli/xn.mli index 0acd3551e09..615f5c868b2 100644 --- a/ocaml/xenopsd/cli/xn.mli +++ b/ocaml/xenopsd/cli/xn.mli @@ -47,6 +47,9 @@ val resume : -> string option -> [> `Error of bool * string | `Ok of unit] +val fast_resume : + 'a -> string option -> [> `Error of bool * string | `Ok of unit] + val console_connect : 'a -> string option -> [> `Error of bool * string | `Ok of unit] diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 9703f4c2a93..54d528829ff 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -164,6 +164,7 @@ type atomic = (** takes suspend data, plus optionally vGPU state data *) | VM_restore of (Vm.id * data * data option) (** takes suspend data, plus optionally vGPU state data *) + | VM_fast_resume of Vm.id | VM_delay of (Vm.id * float) (** used to suppress fast reboot loops *) | VM_rename of (Vm.id * Vm.id * rename_when) | VM_import_metadata of (Vm.id * Metadata.t) @@ -279,6 +280,8 @@ let rec name_of_atomic = function "VM_save" | VM_restore _ -> "VM_restore" + | VM_fast_resume _ -> + "VM_fast_resume" | VM_delay _ -> "VM_delay" | VM_rename _ -> @@ -2377,6 +2380,9 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) let extras = [] in B.VM.restore t progress_callback (VM_DB.read_exn id) vbds vifs data vgpu_data extras + | VM_fast_resume id -> + debug "VM.fast_resume %s" id ; + B.VM.resume t (VM_DB.read_exn id) | VM_delay (id, t) -> debug "VM %s: waiting for %.2f before next VM action" id t ; Thread.delay t @@ -2669,6 +2675,7 @@ and trigger_cleanup_after_failure_atom op t = | VM_s3resume id | VM_save (id, _, _, _) | VM_restore (id, _, _) + | VM_fast_resume id | VM_delay (id, _) | VM_softreboot id -> immediate_operation dbg id (VM_check_state id) @@ -3828,6 +3835,8 @@ module VM = struct let resume _ dbg id disk = queue_operation dbg id (VM_resume (id, Disk disk)) + let fast_resume _ dbg id = queue_operation dbg id (Atomic (VM_fast_resume id)) + let s3suspend _ dbg id = queue_operation dbg id (Atomic (VM_s3suspend id)) let s3resume _ dbg id = queue_operation dbg id (Atomic (VM_s3resume id)) @@ -4409,6 +4418,7 @@ let _ = Server.VM.reboot (VM.reboot ()) ; Server.VM.suspend (VM.suspend ()) ; Server.VM.resume (VM.resume ()) ; + Server.VM.fast_resume (VM.fast_resume ()) ; Server.VM.s3suspend (VM.s3suspend ()) ; Server.VM.s3resume (VM.s3resume ()) ; Server.VM.export_metadata (VM.export_metadata ()) ; diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index e4a61bb9ac8..6cee8a58f05 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -159,6 +159,8 @@ module type S = sig -> string list -> unit + val resume : Xenops_task.task_handle -> Vm.t -> unit + val s3suspend : Xenops_task.task_handle -> Vm.t -> unit val s3resume : Xenops_task.task_handle -> Vm.t -> unit diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index 1a42aafafb4..d812910fd27 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -97,6 +97,8 @@ module VM = struct let restore _ _ _ _ _ _ _ = unimplemented __FUNCTION__ + let resume _ _ = unimplemented __FUNCTION__ + let s3suspend _ _ = unimplemented __FUNCTION__ let s3resume _ _ = unimplemented __FUNCTION__ diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 90df781d8f2..cf124131bd2 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1364,6 +1364,19 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn store_port local_stuff vm_stuff +let resume_post ~xc ~xs domid = + let dom_path = xs.Xs.getdomainpath domid in + let store_mfn_s = xs.Xs.read (dom_path ^ "/store/ring-ref") in + let store_mfn = Nativeint.of_string store_mfn_s in + let store_port = int_of_string (xs.Xs.read (dom_path ^ "/store/port")) in + xs.Xs.introduce domid store_mfn store_port + +let resume (task : Xenops_task.task_handle) ~xc ~xs ~qemu_domid ~domain_type + domid = + Xenctrl.domain_resume_fast xc domid ; + resume_post ~xc ~xs domid ; + if domain_type = `hvm then Device.Dm.resume task ~xs ~qemu_domid domid + type suspend_flag = Live | Debug let dm_flags = diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index 40f154561a3..574782fdcec 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -242,6 +242,16 @@ val build : -> unit (** Restore a domain using the info provided *) +val resume : + Xenops_task.Xenops_task.task_handle + -> xc:Xenctrl.handle + -> xs:Ezxenstore_core.Xenstore.Xs.xsh + -> qemu_domid:int + -> domain_type:[`hvm | `pv | `pvh] + -> domid + -> unit +(** Fast resume *) + val restore : Xenops_task.Xenops_task.task_handle -> xc:Xenctrl.handle diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 16031f211de..03a4c0b641f 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3021,6 +3021,26 @@ module VM = struct Domain.shutdown ~xc ~xs di.Xenctrl.domid Domain.S3Suspend ) + let resume t vm = + on_domain t vm (fun xc xs task vm di -> + let domid = di.Xenctrl.domid in + let qemu_domid = this_domid ~xs in + let domain_type = + match get_domain_type ~xs di with + | Vm.Domain_HVM -> + `hvm + | Vm.Domain_PV -> + `pv + | Vm.Domain_PVinPVH -> + `pvh + | Vm.Domain_PVH -> + `pvh + | Vm.Domain_undefined -> + failwith "undefined domain type: cannot resume" + in + Domain.resume task ~xc ~xs ~qemu_domid ~domain_type domid + ) + let s3resume t vm = (* XXX: TODO: monitor the guest's response; track the s3 state *) on_domain t vm (fun xc _xs _task _vm di -> From 88ece8963889d1b07b0a992e0138678c18657000 Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Tue, 2 Dec 2025 16:00:47 +0000 Subject: [PATCH 11/59] fixup! CP-31566 define xenopsd fast resume operation Signed-off-by: Christian Lindig --- ocaml/xenopsd/xc/domain.ml | 2 +- ocaml/xenopsd/xc/xenops_server_xen.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index cf124131bd2..57008388db7 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -1364,7 +1364,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn store_port local_stuff vm_stuff -let resume_post ~xc ~xs domid = +let resume_post ~xc:_ ~xs domid = let dom_path = xs.Xs.getdomainpath domid in let store_mfn_s = xs.Xs.read (dom_path ^ "/store/ring-ref") in let store_mfn = Nativeint.of_string store_mfn_s in diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 03a4c0b641f..8b4d0a4b40a 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -3022,7 +3022,7 @@ module VM = struct ) let resume t vm = - on_domain t vm (fun xc xs task vm di -> + on_domain t vm (fun xc xs task _vm di -> let domid = di.Xenctrl.domid in let qemu_domid = this_domid ~xs in let domain_type = From f255a466d0147a0f8ad68d90876f6b97f77b837f Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Wed, 3 Dec 2025 15:55:41 +0000 Subject: [PATCH 12/59] increase max supported NVMe request size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current default value for the NVMe MDTS parameter exposed in QEMU emulated NMVe devices is 7 (max 512KiB requests). However there seems to be an internal Windows Server 2025 issue that possibly triggers when splitting bigger requests into smaller on in the NVMe Windows driver. Increase the exposed MDTS value on the emulated QEMU NVMe device to 9 (max 2MiB request size), as that seems to drop the reproduction rate of the issue. Discussion is ongoing with Microsoft to get the issue identified and possibly sorted on their end. For the time being apply this mitigation in qemu-wrapper as a workaround. Signed-off-by: Roger Pau Monné Signed-off-by: Christian Lindig --- ocaml/xenopsd/xc/device.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xenopsd/xc/device.ml b/ocaml/xenopsd/xc/device.ml index 73f136feca4..9beeecf436b 100644 --- a/ocaml/xenopsd/xc/device.ml +++ b/ocaml/xenopsd/xc/device.ml @@ -2774,7 +2774,7 @@ module Backend = struct ] (* 4 and 5 are NICs, and we can only have two, 6 is platform *) - let extra_args = ["-device"; "nvme,serial=nvme0,id=nvme0,addr=7"] + let extra_args = ["-device"; "nvme,serial=nvme0,mdts=9,id=nvme0,addr=7"] end module XenPV = struct let addr ~xs:_ ~domid:_ _ ~nics:_ = 6 end From f905d643745945f4eb262973d40eabaeaeeba35d Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Wed, 3 Dec 2025 18:47:23 +0800 Subject: [PATCH 13/59] CA-420533: Only clear RestartVM guidance on up-to-date hosts During rolling pool upgrade (RPU), RestartVM guidance should only be cleared when a VM restarts on a host that has been updated to match the coordinator's software version. Previously, the guidance was cleared whenever a VM restarted, regardless of the host's update status. This commit ensures that RestartVM guidance persists until the VM restarts on an up-to-date host, this provides accurate feedback to administrators about which VMs still need restarting after RPU. Also adds unit tests covering 6 scenarios: * VM start on updated vs old host (via xenopsd) * Suspended VM resume on updated vs old host * VM halt on updated vs old host (via force_state_reset) Signed-off-by: Gang Ji --- ocaml/tests/test_xapi_xenops.ml | 216 +++++++++++++++++++++++++++++++- ocaml/xapi/xapi_vm_lifecycle.ml | 3 + ocaml/xapi/xapi_xenops.ml | 9 +- 3 files changed, 225 insertions(+), 3 deletions(-) diff --git a/ocaml/tests/test_xapi_xenops.ml b/ocaml/tests/test_xapi_xenops.ml index e1f1bf048e2..42f0bb5708d 100644 --- a/ocaml/tests/test_xapi_xenops.ml +++ b/ocaml/tests/test_xapi_xenops.ml @@ -3,6 +3,92 @@ open Test_common module D = Debug.Make (struct let name = "test_xapi_xenops" end) open D +module Date = Clock.Date + +(** Helper to create a Xenops VM state for testing *) +let make_xenops_state ~power_state ?(last_start_time = 0.0) () = + let open Xenops_interface.Vm in + { + power_state + ; domids= [0] + ; consoles= [] + ; memory_target= 0L + ; memory_actual= 0L + ; memory_limit= 0L + ; vcpu_target= 1 + ; shadow_multiplier_target= 1.0 + ; rtc_timeoffset= "" + ; uncooperative_balloon_driver= false + ; guest_agent= [] + ; xsdata_state= [] + ; pv_drivers_detected= false + ; last_start_time + ; hvm= false + ; nomigrate= false + ; nested_virt= false + ; domain_type= Domain_PV + ; featureset= "" + } + +(** Helper to set up VM for testing: sets pending guidances, resident host, and power state *) +let setup_vm_for_test ~__context ~vm ~guidances ~resident_on ~power_state = + Db.VM.set_pending_guidances ~__context ~self:vm ~value:guidances ; + Db.VM.set_resident_on ~__context ~self:vm ~value:resident_on ; + Db.VM.set_power_state ~__context ~self:vm ~value:power_state + +(** Helper to check pending guidances after an operation *) +let check_pending_guidances ~__context ~vm ~expect_restart_vm + ~expect_restart_device_model ~test_description = + let remaining = Db.VM.get_pending_guidances ~__context ~self:vm in + Alcotest.(check bool) + (Printf.sprintf "restart_vm guidance %s - %s" + (if expect_restart_vm then "present" else "cleared") + test_description + ) + expect_restart_vm + (List.mem `restart_vm remaining) ; + Alcotest.(check bool) + (Printf.sprintf "restart_device_model guidance %s - %s" + (if expect_restart_device_model then "present" else "cleared") + test_description + ) + expect_restart_device_model + (List.mem `restart_device_model remaining) + +(** Helper to simulate a VM state update via update_vm_internal *) +let simulate_vm_state_update ~__context ~vm ~previous_power_state + ~new_power_state ~localhost = + let previous_state = make_xenops_state ~power_state:previous_power_state () in + let new_state = + make_xenops_state ~power_state:new_power_state ~last_start_time:100.0 () + in + let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in + let metrics = Db.VM.get_metrics ~__context ~self:vm in + Db.VM_metrics.set_start_time ~__context ~self:metrics + ~value:(Date.of_unix_time 50.0) ; + ignore + (Xapi_xenops.update_vm_internal ~__context ~id:vm_uuid ~self:vm + ~previous:(Some previous_state) ~info:(Some new_state) ~localhost + ) + +(** Helper to set host software version *) +let set_host_software_version ~__context ~host ~platform_version ~xapi_version = + Db.Host.remove_from_software_version ~__context ~self:host + ~key:Xapi_globs._platform_version ; + Db.Host.add_to_software_version ~__context ~self:host + ~key:Xapi_globs._platform_version ~value:platform_version ; + Db.Host.remove_from_software_version ~__context ~self:host + ~key:Xapi_globs._xapi_version ; + Db.Host.add_to_software_version ~__context ~self:host + ~key:Xapi_globs._xapi_version ~value:xapi_version + +(** Helper to get the pool from the test database *) +let get_pool ~__context = + match Db.Pool.get_all ~__context with + | pool :: _ -> + pool + | [] -> + failwith "No pool found in test database" let simulator_setup = ref false @@ -187,4 +273,132 @@ let test_xapi_restart () = ) unsetup_simulator -let test = [("test_xapi_restart", `Quick, test_xapi_restart)] +(** Test that RestartVM guidance is only cleared when VM starts on up-to-date host *) +let test_pending_guidance_vm_start () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + let vm = make_vm ~__context () in + + (* Set up VM guidances - both restart_vm and restart_device_model *) + let guidances = [`restart_vm; `restart_device_model] in + + (* Test 1: VM starting on up-to-date host - should clear restart_vm *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Halted ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Halted + ~new_power_state:Xenops_interface.Running ~localhost ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false + ~test_description:"VM started on up-to-date host" ; + + (* Test 2: VM starting on old host - should NOT clear restart_vm *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Halted ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Halted + ~new_power_state:Xenops_interface.Running ~localhost:host2 ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:false + ~test_description:"VM started on old host" + +(** Test that NO guidance is cleared when suspended VM resumes *) +let test_pending_guidance_vm_resume () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + (* Test 1: Suspended VM resumed on up-to-date host - should NOT clear any guidance *) + let vm = make_vm ~__context () in + let guidances = [`restart_vm; `restart_device_model] in + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Suspended ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Suspended + ~new_power_state:Xenops_interface.Running ~localhost ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:true + ~test_description:"suspended VM resumed on up-to-date host" ; + + (* Test 2: Suspended VM resumed on old host - should NOT clear any guidance *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Suspended ; + simulate_vm_state_update ~__context ~vm + ~previous_power_state:Xenops_interface.Suspended + ~new_power_state:Xenops_interface.Running ~localhost:host2 ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:true + ~expect_restart_device_model:true + ~test_description:"suspended VM resumed on old host" + +(** Test that RestartVM guidance is always cleared when VM is halted *) +let test_pending_guidance_vm_halt () = + let __context = make_test_database () in + Context.set_test_rpc __context (Mock_rpc.rpc __context) ; + + let localhost = Helpers.get_localhost ~__context in + let host2 = make_host ~__context ~name_label:"host2" ~hostname:"host2" () in + + (* Set up software versions - localhost is up-to-date, host2 is not *) + set_host_software_version ~__context ~host:localhost ~platform_version:"1.2.3" + ~xapi_version:"4.5.6" ; + set_host_software_version ~__context ~host:host2 ~platform_version:"1.2.2" + ~xapi_version:"4.5.5" ; + + (* Set localhost as the pool coordinator *) + let pool = get_pool ~__context in + Db.Pool.set_master ~__context ~self:pool ~value:localhost ; + + let vm = make_vm ~__context () in + let guidances = [`restart_vm; `restart_device_model] in + + (* Test 1: VM halted on up-to-date host - should clear both guidances *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:localhost + ~power_state:`Running ; + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:`Halted ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false + ~test_description:"VM halted on up-to-date host" ; + + (* Test 2: VM halted on old host - should ALSO clear both guidances + because VM.start_on will enforce host version check on next start *) + setup_vm_for_test ~__context ~vm ~guidances ~resident_on:host2 + ~power_state:`Running ; + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:`Halted ; + check_pending_guidances ~__context ~vm ~expect_restart_vm:false + ~expect_restart_device_model:false ~test_description:"VM halted on old host" + +let test = + [ + ("test_xapi_restart", `Quick, test_xapi_restart) + ; ("test_pending_guidance_vm_start", `Quick, test_pending_guidance_vm_start) + ; ("test_pending_guidance_vm_resume", `Quick, test_pending_guidance_vm_resume) + ; ("test_pending_guidance_vm_halt", `Quick, test_pending_guidance_vm_halt) + ] diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 14290421fb4..6d1ce9a537f 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -909,6 +909,9 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state = (* Blank the requires_reboot flag *) Db.VM.set_requires_reboot ~__context ~self ~value:false ; remove_pending_guidance ~__context ~self ~value:`restart_device_model ; + (* Always remove RestartVM guidance when VM becomes Halted: VM.start_on checks + host version via assert_host_has_highest_version_in_pool, preventing the VM + from starting on an outdated host, so it will necessarily start on an up-to-date host *) remove_pending_guidance ~__context ~self ~value:`restart_vm ) ; (* Do not clear resident_on for VM and VGPU in a checkpoint operation *) diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 9b12bcec5a6..0ea29ea4cf7 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -2350,8 +2350,13 @@ let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = then ( Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self ~value:`restart_device_model ; - Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self - ~value:`restart_vm + (* Only remove RestartVM guidance if host is up-to-date with coordinator *) + if + Helpers.Checks.RPU.are_host_versions_same_on_master ~__context + ~host:localhost + then + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_vm ) ) ; create_guest_metrics_if_needed () ; From bb705d1334f4e8df49dba1a7a3c732b236746f97 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:35:44 +0000 Subject: [PATCH 14/59] qcow-stream-tool: Add read_headers command It returns info on the allocated clusters in a JSON. Signed-off-by: Andrii Sultanov --- ocaml/qcow-stream-tool/dune | 4 ++ ocaml/qcow-stream-tool/qcow_stream_tool.ml | 69 +++++++++++++++++++--- 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/ocaml/qcow-stream-tool/dune b/ocaml/qcow-stream-tool/dune index 4daf3469dc5..436dd58681c 100644 --- a/ocaml/qcow-stream-tool/dune +++ b/ocaml/qcow-stream-tool/dune @@ -7,5 +7,9 @@ qcow-stream cmdliner unix + lwt.unix + lwt + qcow-types + yojson ) ) diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.ml b/ocaml/qcow-stream-tool/qcow_stream_tool.ml index 7158867c248..41b57c9a366 100644 --- a/ocaml/qcow-stream-tool/qcow_stream_tool.ml +++ b/ocaml/qcow-stream-tool/qcow_stream_tool.ml @@ -1,11 +1,53 @@ +open Cmdliner + module Impl = struct let stream_decode output = Qcow_stream.stream_decode Unix.stdin output ; `Ok () + + let read_headers qcow_path = + let open Lwt.Syntax in + let t = + let* fd = Lwt_unix.openfile qcow_path [Unix.O_RDONLY] 0 in + let* virtual_size, cluster_bits, _, data_cluster_map = + Qcow_stream.start_stream_decode fd + in + let clusters = Qcow_types.Cluster.Map.bindings data_cluster_map in + let clusters = + List.map + (fun (_, virt_address) -> + let ( >> ) = Int64.shift_right_logical in + let address = + Int64.to_int (virt_address >> Int32.to_int cluster_bits) + in + `Int address + ) + clusters + in + let json = + `Assoc + [ + ("virtual_size", `Int (Int64.to_int virtual_size)) + ; ("cluster_bits", `Int (Int32.to_int cluster_bits)) + ; ("data_clusters", `List clusters) + ] + in + let json_string = Yojson.to_string json in + let* () = Lwt_io.print json_string in + let* () = Lwt_io.flush Lwt_io.stdout in + Lwt.return_unit + in + Lwt_main.run t ; `Ok () end module Cli = struct - open Cmdliner + let output default = + let doc = Printf.sprintf "Path to the output file." in + Arg.(value & pos 0 string default & info [] ~doc) + + let input = + let doc = Printf.sprintf "Path to the input file." in + Arg.(required & pos 0 (some string) None & info [] ~doc) let stream_decode_cmd = let doc = "decode qcow2 formatted data from stdin and write a raw image" in @@ -15,15 +57,28 @@ module Cli = struct ; `P "Decode qcow2 formatted data from stdin and write to a raw file." ] in - let output default = - let doc = Printf.sprintf "Path to the output file." in - Arg.(value & pos 0 string default & info [] ~doc) - in Cmd.v (Cmd.info "stream_decode" ~doc ~man) Term.(ret (const Impl.stream_decode $ output "test.raw")) - let main () = Cmd.eval stream_decode_cmd + let read_headers_cmd = + let doc = + "Determine allocated clusters by parsing qcow2 file at the provided \ + path. Returns JSON like the following: {'virtual_size': X, \ + 'cluster_bits': Y, 'data_clusters': [1,2,3]}" + in + let man = [`S "DESCRIPTION"; `P doc] in + Cmd.v + (Cmd.info "read_headers" ~doc ~man) + Term.(ret (const Impl.read_headers $ input)) + + let cmds = [stream_decode_cmd; read_headers_cmd] end -let () = exit (Cli.main ()) +let info = + let doc = "minimal CLI for qcow-stream" in + Cmd.info "qcow-stream-tool" ~version:"1.0.0" ~doc + +let () = + let cmd = Cmd.group info Cli.cmds in + exit (Cmd.eval cmd) From 5ec13cc5c23dcab31309dcfdd8f751a307db6d7f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:40:13 +0000 Subject: [PATCH 15/59] python3: Use pre-parsed cluster allocation data in qcow2-to-stdout On export, instead of reading the whole raw disk, consult the JSON (if provided), and only allocate the clusters that are present in the table. This is analogous to vhd-tool's handling of export, and greatly speeds up handling of sparse disks. Signed-off-by: Andrii Sultanov --- python3/libexec/qcow2-to-stdout.py | 135 ++++++++++++++++++++++++----- 1 file changed, 112 insertions(+), 23 deletions(-) diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py index b0638bc5904..4ce1cc72b56 100755 --- a/python3/libexec/qcow2-to-stdout.py +++ b/python3/libexec/qcow2-to-stdout.py @@ -24,6 +24,7 @@ # clusters. For the sake of simplicity the code sometimes talks about # refcount tables and L1 tables when referring to those clusters. +import json import argparse import math import os @@ -91,7 +92,9 @@ def write_features(cluster, offset, data_file_name): def write_qcow2_content(input_file, cluster_size, refcount_bits, - data_file_name, data_file_raw, diff_file_name): + data_file_name, data_file_raw, diff_file_name, + virtual_size, nonzero_clusters, + diff_virtual_size, diff_nonzero_clusters): # Some basic values l1_entries_per_table = cluster_size // 8 l2_entries_per_table = cluster_size // 8 @@ -102,8 +105,12 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, fd = os.open(input_file, os.O_RDONLY) # Virtual disk size, number of data clusters and L1 entries - block_device_size = os.lseek(fd, 0, os.SEEK_END) - disk_size = align_up(block_device_size, 512) + if virtual_size is None: + block_device_size = os.lseek(fd, 0, os.SEEK_END) + disk_size = align_up(block_device_size, 512) + else: + block_device_size = virtual_size + disk_size = virtual_size total_data_clusters = math.ceil(disk_size / cluster_size) l1_entries = math.ceil(total_data_clusters / l2_entries_per_table) allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table) @@ -118,6 +125,28 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, allocated_l2_tables = 0 allocated_data_clusters = 0 + def allocate_cluster(idx): + nonlocal allocated_data_clusters + nonlocal allocated_l2_tables + bitmap_set(l2_bitmap, idx) + allocated_data_clusters += 1 + # Allocated data clusters also need their corresponding L1 entry and L2 table + l1_idx = math.floor(idx / l2_entries_per_table) + if not bitmap_is_set(l1_bitmap, l1_idx): + bitmap_set(l1_bitmap, l1_idx) + allocated_l2_tables += 1 + + # Allocates a cluster in the appropriate bitmaps if it's different + # from cluster_to_compare_with + def check_cluster_allocate(idx, cluster, cluster_to_compare_with): + # If the last cluster is smaller than cluster_size pad it with zeroes + if len(cluster) < cluster_size: + cluster += bytes(cluster_size - len(cluster)) + # If a cluster has different data from the cluster_to_compare_with then it + # must be allocated in the output file and its L2 entry must be set + if cluster != cluster_to_compare_with: + allocate_cluster(idx) + if data_file_raw: # If data_file_raw is set then all clusters are allocated and # we don't need to read the input file at all. @@ -126,26 +155,39 @@ def write_qcow2_content(input_file, cluster_size, refcount_bits, bitmap_set(l1_bitmap, idx) for idx in range(total_data_clusters): bitmap_set(l2_bitmap, idx) - else: - # Allocates a cluster in the appropriate bitmaps if it's different - # from cluster_to_compare_with - def check_cluster_allocate(idx, cluster, cluster_to_compare_with): - nonlocal allocated_data_clusters - nonlocal allocated_l2_tables - # If the last cluster is smaller than cluster_size pad it with zeroes - if len(cluster) < cluster_size: - cluster += bytes(cluster_size - len(cluster)) - # If a cluster has different data from the cluster_to_compare_with then it - # must be allocated in the output file and its L2 entry must be set - if cluster != cluster_to_compare_with: - bitmap_set(l2_bitmap, idx) - allocated_data_clusters += 1 - # Allocated data clusters also need their corresponding L1 entry and L2 table - l1_idx = math.floor(idx / l2_entries_per_table) - if not bitmap_is_set(l1_bitmap, l1_idx): - bitmap_set(l1_bitmap, l1_idx) - allocated_l2_tables += 1 + elif nonzero_clusters is not None: + if diff_file_name: + if diff_virtual_size is None or diff_nonzero_clusters is None: + sys.exit("[Error] QCOW headers for the diff file were not provided.") + # Read all the clusters that differ from the diff_file_name + diff_fd = os.open(diff_file_name, os.O_RDONLY) + last_diff_cluster = align_up(diff_virtual_size, cluster_size) // cluster_size + # In case input_file is bigger than diff_file_name, first check + # if clusters from diff_file_name differ, and then check if the + # rest contain data + diff_nonzero_clusters_set = set(diff_nonzero_clusters) + for cluster in nonzero_clusters: + if cluster >= last_diff_cluster: + allocate_cluster(cluster) + elif cluster in diff_nonzero_clusters_set: + # If a cluster has different data from the original_cluster + # then it must be allocated + cluster_data = os.pread(fd, cluster_size, cluster_size * cluster) + original_cluster = os.pread(diff_fd, cluster_size, cluster_size * cluster) + check_cluster_allocate(cluster, cluster_data, original_cluster) + diff_nonzero_clusters_set.remove(cluster) + else: + allocate_cluster(cluster) + + # These are not present in the original file + for cluster in diff_nonzero_clusters_set: + allocate_cluster(cluster) + else: + for cluster in nonzero_clusters: + allocate_cluster(cluster) + + else: zero_cluster = bytes(cluster_size) last_cluster = align_up(block_device_size, cluster_size) // cluster_size if diff_file_name: @@ -384,11 +426,54 @@ def main(): help="enable data_file_raw on the generated image (implies -d)", action="store_true", ) + parser.add_argument( + "--json-header", + dest="json_header", + help="stdin contains a JSON of pre-parsed QCOW2 information" + "(virtual_size, data_clusters, cluster_bits)", + action="store_true", + ) + parser.add_argument( + "--json-header-diff", + dest="json_header_diff", + metavar="json_header_diff", + help="File descriptor that contains a JSON of pre-parsed QCOW2 " + "information for the diff_file_name", + type=int, + default=None, + ) args = parser.parse_args() if args.data_file_raw: args.data_file = True + virtual_size = None + nonzero_clusters = None + diff_virtual_size = None + diff_nonzero_clusters = None + if args.json_header: + json_header = json.load(sys.stdin) + try: + virtual_size = json_header['virtual_size'] + source_cluster_size = 2 ** json_header['cluster_bits'] + if source_cluster_size != args.cluster_size: + args.cluster_size = source_cluster_size + nonzero_clusters = json_header['data_clusters'] + except KeyError as e: + raise RuntimeError(f'Incomplete JSON - missing value for {str(e)}') from e + if args.json_header_diff: + f = os.fdopen(args.json_header_diff) + json_header = json.load(f) + try: + diff_virtual_size = json_header['virtual_size'] + if 2 ** json_header['cluster_bits'] == args.cluster_size: + diff_nonzero_clusters = json_header['data_clusters'] + else: + sys.exit(f"[Error] Cluster size in the files being compared are " + f"different: {2**json_header['cluster_bits']} vs. {args.cluster_size}") + except KeyError as e: + raise RuntimeError(f'Incomplete JSON for the diff - missing value for {str(e)}') from e + if not os.path.exists(args.input_file): sys.exit(f"[Error] {args.input_file} does not exist.") @@ -413,7 +498,11 @@ def main(): args.refcount_bits, data_file_name, args.data_file_raw, - args.diff_file_name + args.diff_file_name, + virtual_size, + nonzero_clusters, + diff_virtual_size, + diff_nonzero_clusters ) From 15f80881f08df8b1dc1244ffcff58c5f0af4c90d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:30:50 +0000 Subject: [PATCH 16/59] vhd_tool_wrapper: Make vhd_of_device generic Take the expected driver type as a parameter, to allow this helper to be used by qcow code as well. Signed-off-by: Andrii Sultanov --- ocaml/xapi/vhd_tool_wrapper.ml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/ocaml/xapi/vhd_tool_wrapper.ml b/ocaml/xapi/vhd_tool_wrapper.ml index 73f25785eb8..f3f791fe251 100644 --- a/ocaml/xapi/vhd_tool_wrapper.ml +++ b/ocaml/xapi/vhd_tool_wrapper.ml @@ -149,25 +149,27 @@ let find_backend_device path = raise Not_found with _ -> None -(** [vhd_of_device path] returns (Some vhd) where 'vhd' is the vhd leaf backing a particular device [path] or None. - [path] may either be a blktap2 device *or* a blkfront device backed by a blktap2 device. If the latter then - the script must be run in the same domain as blkback. *) -let vhd_of_device path = +(** [backing_file_of_device path] returns (Some backing_file) where 'backing_file' + is the leaf backing a particular device [path] (with a driver of type + [driver] or None. [path] may either be a blktap2 device *or* a blkfront + device backed by a blktap2 device. If the latter then the script must be + run in the same domain as blkback. *) +let backing_file_of_device path ~driver = let tapdisk_of_path path = try match Tapctl.of_device (Tapctl.create ()) path with - | _, _, Some ("vhd", vhd) -> - Some vhd + | _, _, Some (typ, backing_file) when typ = driver -> + Some backing_file | _, _, _ -> raise Not_found with | Tapctl.Not_blktap -> ( debug "Device %s is not controlled by blktap" path ; - (* Check if it is a VHD behind a NBD deivce *) + (* Check if it is a [driver] behind a NBD device *) Stream_vdi.(get_nbd_device path |> image_behind_nbd_device) |> function - | Some ("vhd", vhd) -> - debug "%s is a VHD behind NBD device %s" vhd path ; - Some vhd + | Some (typ, backing_file) when typ = driver -> + debug "%s is a %s behind NBD device %s" backing_file driver path ; + Some backing_file | _ -> None ) @@ -182,6 +184,7 @@ let vhd_of_device path = let send progress_cb ?relative_to (protocol : string) (dest_format : string) (s : Unix.file_descr) (path : string) (size : Int64.t) (prefix : string) = + let vhd_of_device = backing_file_of_device ~driver:"vhd" in let s' = Uuidx.(to_string (make ())) in let source_format, source = match (Stream_vdi.get_nbd_device path, vhd_of_device path, relative_to) with From 368596819c29c1682b2d07528b979615a094de00 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 18 Nov 2025 08:42:33 +0000 Subject: [PATCH 17/59] qcow_tool_wrapper: Read headers of QCOW2-backed VDIs on export Pass the JSON output of read_headers into qcow2-to-stdout to handle the export further. Signed-off-by: Andrii Sultanov --- ocaml/xapi/qcow_tool_wrapper.ml | 55 +++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml index 30d0eb63811..cd42ca123d3 100644 --- a/ocaml/xapi/qcow_tool_wrapper.ml +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -16,14 +16,15 @@ module D = Debug.Make (struct let name = __MODULE__ end) open D -let run_qcow_tool qcow_tool ?input_fd ?output_fd (_progress_cb : int -> unit) - (args : string list) = +let run_qcow_tool qcow_tool ?(replace_fds = []) ?input_fd ?output_fd + (_progress_cb : int -> unit) (args : string list) = info "Executing %s %s" qcow_tool (String.concat " " args) ; let open Forkhelpers in match with_logfile_fd "qcow-tool" (fun log_fd -> let pid = - safe_close_and_exec input_fd output_fd (Some log_fd) [] qcow_tool args + safe_close_and_exec input_fd output_fd (Some log_fd) replace_fds + qcow_tool args in let _, status = waitpid pid in if status <> Unix.WEXITED 0 then ( @@ -46,14 +47,56 @@ let update_task_progress (__context : Context.t) (x : int) = let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) = - let args = [path] in + let args = ["stream_decode"; path] in let qcow_tool = !Xapi_globs.qcow_stream_tool in run_qcow_tool qcow_tool progress_cb args ~input_fd:unix_fd +let read_header qcow_path = + let args = ["read_headers"; qcow_path] in + let qcow_tool = !Xapi_globs.qcow_stream_tool in + let pipe_reader, pipe_writer = Unix.pipe ~cloexec:true () in + + let progress_cb _ = () in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> run_qcow_tool qcow_tool progress_cb args ~output_fd:pipe_writer) + (fun () -> Unix.close pipe_writer) ; + pipe_reader + let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) (_size : Int64.t) = + let qcow_of_device = + Vhd_tool_wrapper.backing_file_of_device ~driver:"qcow2" + in + let qcow_path = qcow_of_device path in + + (* If VDI is backed by QCOW, parse the header to determine nonzero clusters + to avoid reading all of the raw disk *) + let input_fd = Option.map read_header qcow_path in + + (* Parse the header of the VDI we are diffing against as well *) + let relative_to_qcow_path = Option.bind relative_to qcow_of_device in + let diff_fd = Option.map read_header relative_to_qcow_path in + + let unique_string = Uuidx.(to_string (make ())) in let args = - [path] @ match relative_to with None -> [] | Some vdi -> ["--diff"; vdi] + [path] + @ (match relative_to with None -> [] | Some vdi -> ["--diff"; vdi]) + @ ( match relative_to_qcow_path with + | None -> + [] + | Some _ -> + ["--json-header-diff"; unique_string] + ) + @ match qcow_path with None -> [] | Some _ -> ["--json-header"] in let qcow_tool = !Xapi_globs.qcow_to_stdout in - run_qcow_tool qcow_tool progress_cb args ~output_fd:unix_fd + let replace_fds = Option.map (fun fd -> [(unique_string, fd)]) diff_fd in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> + run_qcow_tool qcow_tool progress_cb args ?input_fd ~output_fd:unix_fd + ?replace_fds + ) + (fun () -> + Option.iter Unix.close input_fd ; + Option.iter Unix.close diff_fd + ) From 89140761d71277f233133d6d1f655ecfcbb16864 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 26 Nov 2025 10:44:29 +0000 Subject: [PATCH 18/59] qcow_tool_wrapper: Implement parse_header to determine allocated clusters Translates JSON from qcow-stream-tool to OCaml types. This is currently unused, but will be used in stream_vdi and vhd_tool_wrapper in the future. Signed-off-by: Andrii Sultanov --- ocaml/xapi/qcow_tool_wrapper.ml | 14 ++++++++++++++ ocaml/xapi/qcow_tool_wrapper.mli | 2 ++ 2 files changed, 16 insertions(+) diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml index cd42ca123d3..e3cd13d469b 100644 --- a/ocaml/xapi/qcow_tool_wrapper.ml +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -62,6 +62,20 @@ let read_header qcow_path = (fun () -> Unix.close pipe_writer) ; pipe_reader +let parse_header qcow_path = + let pipe_reader = read_header qcow_path in + let ic = Unix.in_channel_of_descr pipe_reader in + let buf = Buffer.create 4096 in + let json = Yojson.Basic.from_channel ~buf ~fname:"qcow_header.json" ic in + In_channel.close ic ; + let cluster_size = + 1 lsl Yojson.Basic.Util.(member "cluster_bits" json |> to_int) + in + let cluster_list = + Yojson.Basic.Util.(member "data_clusters" json |> to_list |> List.map to_int) + in + (cluster_size, cluster_list) + let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) (path : string) (_size : Int64.t) = let qcow_of_device = diff --git a/ocaml/xapi/qcow_tool_wrapper.mli b/ocaml/xapi/qcow_tool_wrapper.mli index 51c3c626567..c1c4a6426af 100644 --- a/ocaml/xapi/qcow_tool_wrapper.mli +++ b/ocaml/xapi/qcow_tool_wrapper.mli @@ -23,3 +23,5 @@ val send : -> string -> int64 -> unit + +val parse_header : string -> int * int list From 2cc325be0e05cdaa71b7043299739602c15839c2 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 10 Dec 2025 09:42:45 +0000 Subject: [PATCH 19/59] opam: add missing dependencies A merge yesterday added a dependency to yojson, codify it Signed-off-by: Pau Ruiz Safont --- dune-project | 1 + opam/qcow-stream-tool.opam | 1 + 2 files changed, 2 insertions(+) diff --git a/dune-project b/dune-project index 8d329288de3..93adbd1bd62 100644 --- a/dune-project +++ b/dune-project @@ -586,6 +586,7 @@ (depends qcow-stream cmdliner + yojson ) ) diff --git a/opam/qcow-stream-tool.opam b/opam/qcow-stream-tool.opam index 8090aec7a3f..c4a01535780 100644 --- a/opam/qcow-stream-tool.opam +++ b/opam/qcow-stream-tool.opam @@ -10,6 +10,7 @@ depends: [ "dune" {>= "3.20"} "qcow-stream" "cmdliner" + "yojson" "odoc" {with-doc} ] build: [ From 949f1dcf4d70ad97cad7fd4e807e8adbe8d8f920 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Thu, 11 Dec 2025 11:25:05 +0800 Subject: [PATCH 20/59] CA-420856: Re-read inventory file when resetting network Before resetting, the inventory file is probably updated. For example, during pool.eject, the "MANAGEMENT_INTERFACE" will be set to"". Re-reading the inventory file here can avoid reading stale data. Signed-off-by: Ming Lu --- ocaml/networkd/bin/network_server.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 65fa98d62d5..70da9691d7c 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -212,6 +212,7 @@ let reset_state () = ) ; None in + Inventory.reread_inventory () ; config := Network_config.read_management_conf reset_order let set_gateway_interface _dbg name = From 7c6ddfe579f1816b4e9cb636938c8136da4346e2 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 11 Dec 2025 12:03:18 +0000 Subject: [PATCH 21/59] opam: generate metadata for uuid with dune This also adds the missing dependency on ptime. This was added ages ago, but most of the time ptime is compiled before, so it's very rarely not available. Signed-off-by: Pau Ruiz Safont --- dune-project | 12 +++++++++- opam/uuid.opam | 49 +++++++++++++++++++++++------------------ opam/uuid.opam.template | 26 ---------------------- 3 files changed, 38 insertions(+), 49 deletions(-) diff --git a/dune-project b/dune-project index 93adbd1bd62..c5889ff42ac 100644 --- a/dune-project +++ b/dune-project @@ -594,7 +594,17 @@ (name varstored-guard)) (package - (name uuid)) + (name uuid) + (synopsis "Library used by xapi to generate database UUIDs") + (description + "This library allows xapi to use UUIDs with phantom types to avoid mixing UUIDs from different classes of objects. It's based on `uuidm`.") + (depends + (alcotest :with-test) + (fmt :with-test) + ptime + uuidm + ) +) (package (name stunnel) diff --git a/opam/uuid.opam b/opam/uuid.opam index 2fbe23bbd56..2cb6905f12d 100644 --- a/opam/uuid.opam +++ b/opam/uuid.opam @@ -1,30 +1,35 @@ # This file is generated by dune, edit dune-project instead -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -x-maintenance-intent: ["(latest)"] opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" +synopsis: "Library used by xapi to generate database UUIDs" +description: + "This library allows xapi to use UUIDs with phantom types to avoid mixing UUIDs from different classes of objects. It's based on `uuidm`." +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] - -available: [ os = "linux" | os = "macos" ] +bug-reports: "https://github.com/xapi-project/xen-api/issues" depends: [ - "ocaml" - "dune" {>= "3.15"} + "dune" {>= "3.20"} "alcotest" {with-test} "fmt" {with-test} + "ptime" "uuidm" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] ] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} +dev-repo: "git+https://github.com/xapi-project/xen-api.git" +x-maintenance-intent: ["(latest)"] +available: [ os = "linux" | os = "macos" ] diff --git a/opam/uuid.opam.template b/opam/uuid.opam.template index aacc8f63c2b..ce8a4cdd441 100644 --- a/opam/uuid.opam.template +++ b/opam/uuid.opam.template @@ -1,27 +1 @@ -opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" -homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] - available: [ os = "linux" | os = "macos" ] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "alcotest" {with-test} - "fmt" {with-test} - "uuidm" -] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} From a1ff026cd9cfc64fa1b07616a77f224b4d2a1aef Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 11 Dec 2025 13:51:14 +0000 Subject: [PATCH 22/59] libs: remove unused type parameters This becomes an error with ocaml 5.4 Signed-off-by: Pau Ruiz Safont --- ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli b/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli index 286e545321f..a86104e313d 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-fdcaps/operations.mli @@ -47,7 +47,7 @@ val setup : unit -> unit (** {1 Static property tests} *) -val as_readable : (([< readable] as 'a), 'b) make -> ([> readable], 'b) make +val as_readable : ([< readable], 'b) make -> ([> readable], 'b) make (** [as_readable_opt t] returns [Some t] when [t] is readable, and [None] otherwise. *) val as_writable : ([< writable], 'b) make -> ([> writable], 'b) make @@ -55,12 +55,10 @@ val as_writable : ([< writable], 'b) make -> ([> writable], 'b) make (** {1 Runtime property tests} *) -val as_readable_opt : - (([< rw] as 'a), 'b) make -> ([> readable], 'b) make option +val as_readable_opt : ([< rw], 'b) make -> ([> readable], 'b) make option (** [as_readable_opt t] returns [Some t] when [t] is readable, and [None] otherwise. *) -val as_writable_opt : - (([< rw] as 'a), 'b) make -> ([> writable], 'b) make option +val as_writable_opt : ([< rw], 'b) make -> ([> writable], 'b) make option (** [as_writable_opt t] returns [Some t] when [t] is readable, and [None] otherwise. *) val as_spipe_opt : ('a, [< kind]) make -> ('a, [> espipe]) make option From 8e21f45feda7e13b2bc36be56f2a054f1854a61c Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 11 Dec 2025 13:57:32 +0000 Subject: [PATCH 23/59] git-blame: ignore another formatting commit Signed-off-by: Pau Ruiz Safont --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 0b898836157..fd124656302 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -33,6 +33,7 @@ b12cf444edea15da6274975e1b2ca6a7fce2a090 364c27f5d18ab9dd31825e67a93efabecad06823 d8b4de9076531dd13bdffa20cc10c72290a52356 bdf06bca7534fbc0c4fc3cee3408a51a22615226 +eefc649e17086fbc200e4da114ea673825e79864 # ocp-indent d018d26d6acd4707a23288b327b49e44f732725e From cd50d44a8a80922071e67f6d3687202ca14bbb63 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 11 Dec 2025 14:24:45 +0000 Subject: [PATCH 24/59] sdk-gen: make code compatible with ocaml 5.4 The formatter record got a new field, override the necessary fields instead of replacing all of the fields, even if they are unchanged. Signed-off-by: Pau Ruiz Safont --- ocaml/sdk-gen/c/helper.ml | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/ocaml/sdk-gen/c/helper.ml b/ocaml/sdk-gen/c/helper.ml index 0079b42ef99..78e53b75b1e 100644 --- a/ocaml/sdk-gen/c/helper.ml +++ b/ocaml/sdk-gen/c/helper.ml @@ -28,19 +28,16 @@ let comment doc ?(indent = 0) s = let buf = Buffer.create 16 in let formatter = Format.formatter_of_buffer buf in let open Format in - let out, flush, newline, spaces = - let funcs = Format.pp_get_formatter_out_functions formatter () in - (funcs.out_string, funcs.out_flush, funcs.out_newline, funcs.out_spaces) - in - + let funcs = Format.pp_get_formatter_out_functions formatter () in + let original_out_newline = funcs.out_newline in let funcs = { - out_string= out - ; out_flush= flush - ; out_newline= - (fun () -> out (Printf.sprintf "\n%s * " indent_str) 0 (indent + 4)) - ; out_spaces= spaces - ; out_indent= spaces + funcs with + out_newline= + (fun () -> + funcs.out_string (Printf.sprintf "\n%s * " indent_str) 0 (indent + 4) + ) + ; out_indent= funcs.out_spaces } in Format.pp_set_formatter_out_functions formatter funcs ; @@ -61,7 +58,7 @@ let comment doc ?(indent = 0) s = Format.fprintf formatter "%!" ; Format.pp_set_formatter_out_functions formatter - {funcs with out_newline= newline} ; + {funcs with out_newline= original_out_newline} ; let result = Buffer.contents buf in let n = String.length result in From 2686b018eb18672f55bf01131d8c62e71be10b40 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 12 Dec 2025 11:25:24 +0000 Subject: [PATCH 25/59] CA-421991: Fix QEMU coredumps on XS9 On XS8, systemd's coredump handler ignores the process core rlimit so QEMU coredumps work correctly. On XS9, systemd's coredump handler respects the process core rlimit so QEMU coredumps end up truncated to 64 MiB which prevents gdb from loading them correctly. Fix this by removing the setting of the core rlimit from the wrapper so that it behaves like XS8. This is fine since the systemd coredump handler ensures that the root fs will not run out of space. Signed-off-by: Ross Lagerwall --- ocaml/xenopsd/scripts/qemu-wrapper | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ocaml/xenopsd/scripts/qemu-wrapper b/ocaml/xenopsd/scripts/qemu-wrapper index b1d811e7126..554590d0713 100644 --- a/ocaml/xenopsd/scripts/qemu-wrapper +++ b/ocaml/xenopsd/scripts/qemu-wrapper @@ -65,16 +65,6 @@ def restrict_fsize(): limit = 1024 * 1024 setrlimit(RLIMIT_FSIZE, (limit, limit)) -def enable_core_dumps(): - - limit = 64 * 1024 * 1024 - oldlimits = getrlimit(RLIMIT_CORE) - hardlimit = oldlimits[1] - if limit > hardlimit: - hardlimit = limit - setrlimit(RLIMIT_CORE, (limit, hardlimit)) - return limit - def xenstore_read(path): return xenstore.read("", path) @@ -117,9 +107,6 @@ def prepare_exec(): print("Warning: writing pid to '%s' cgroup.procs file: %s" \ % (cgroup_slice, e)) - core_dump_limit = enable_core_dumps() - print("core dump limit: %d" % core_dump_limit) - if not file_serial: restrict_fsize() From 2e46250ac935599e6ffa98c14f4460f682ce3a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 15 Dec 2025 10:18:55 +0000 Subject: [PATCH 26/59] CA-421914: preserve Host.numa_affinity_policy across pool join MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `numa-placement` was true (the default in XS9), configuring a host's `numa-affinity-policy` to `any` got lost during pool join. This is because `create_params` is maintained by hand and needs to be extended every time we add a new field to the datamodel. Update `create_params` to propagate `numa_affinity_policy` on pool join. Before pool join: ``` [root@genuk-21-09d ~]# xe host-list params=all|grep numa numa-affinity-policy ( RW): any ``` After pool join: ``` [root@genuk-21-09d ~]# xe host-list params=all|grep numa numa-affinity-policy ( RW): any numa-affinity-policy ( RW): any ``` (prior to this bugfix the newly joined host would revert to default_policy). The correct policy is also set in xenopsd: ``` 2025-12-15T15:12:09.029478+00:00 genuk-21-09d xenopsd-xc: [ info||134 |host.set_numa_affinity_policy R:99aecf4c494f|xenops_server] Enforcing 'any' NUMA affinity policy ``` Signed-off-by: Edwin Török --- ocaml/idl/datamodel_host.ml | 41 ++++++++++++++++++------------- ocaml/tests/common/test_common.ml | 2 +- ocaml/tests/test_host.ml | 2 +- ocaml/xapi/dbsync_slave.ml | 2 +- ocaml/xapi/xapi_host.ml | 5 ++-- ocaml/xapi/xapi_host.mli | 1 + ocaml/xapi/xapi_pool.ml | 1 + 7 files changed, 31 insertions(+), 23 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index f35c6e95103..20a96599622 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1209,6 +1209,23 @@ let license_remove = to the unlicensed edition" ~allowed_roles:_R_POOL_OP () +let host_numa_affinity_policy = + Enum + ( "host_numa_affinity_policy" + , [ + ("any", "VMs are spread across all available NUMA nodes") + ; ( "best_effort" + , "VMs are placed on the smallest number of NUMA nodes that they fit \ + using soft-pinning, but the policy doesn't guarantee a balanced \ + placement, falling back to the 'any' policy." + ) + ; ( "default_policy" + , "Use the NUMA affinity policy that is the default for the current \ + version" + ) + ] + ) + let create_params = [ { @@ -1406,6 +1423,13 @@ let create_params = ; param_release= numbered_release "25.38.0-next" ; param_default= Some (VBool false) } + ; { + param_type= host_numa_affinity_policy + ; param_name= "numa_affinity_policy" + ; param_doc= "NUMA-aware VM memory and vCPU placement policy" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VEnum "default_policy") + } ] let create = @@ -2311,23 +2335,6 @@ let cleanup_pool_secret = ] ~allowed_roles:_R_LOCAL_ROOT_ONLY ~hide_from_docs:true () -let host_numa_affinity_policy = - Enum - ( "host_numa_affinity_policy" - , [ - ("any", "VMs are spread across all available NUMA nodes") - ; ( "best_effort" - , "VMs are placed on the smallest number of NUMA nodes that they fit \ - using soft-pinning, but the policy doesn't guarantee a balanced \ - placement, falling back to the 'any' policy." - ) - ; ( "default_policy" - , "Use the NUMA affinity policy that is the default for the current \ - version" - ) - ] - ) - let set_numa_affinity_policy = call ~name:"set_numa_affinity_policy" ~lifecycle:[] ~doc:"Set VM placement NUMA affinity policy" diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 7fc190f43c7..de5541878a9 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -184,7 +184,7 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ~secure_boot ~software_version:(Xapi_globs.software_version ()) - ~https_only + ~https_only ~numa_affinity_policy:`default_policy in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index 5dda1798413..6644c4a0f31 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -27,7 +27,7 @@ let add_host __context name = ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false ~secure_boot:false ~software_version:(Xapi_globs.software_version ()) - ~https_only:false + ~https_only:false ~numa_affinity_policy:`default_policy ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index ff325b7259e..faec161097e 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -66,7 +66,7 @@ let create_localhost ~__context info = ~console_idle_timeout:Constants.default_console_idle_timeout ~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default ~secure_boot:false ~software_version:[] - ~https_only:!Xapi_globs.https_only + ~https_only:!Xapi_globs.https_only ~numa_affinity_policy:`default_policy in () diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index b9f105610dd..2d38db88d1a 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1029,7 +1029,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode - ~secure_boot ~software_version ~https_only = + ~secure_boot ~software_version ~https_only ~numa_affinity_policy = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1073,8 +1073,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~name_label ~uuid ~other_config:[] ~capabilities:[] ~cpu_configuration:[] (* !!! FIXME hard coding *) ~cpu_info:[] ~chipset_info ~memory_overhead:0L - ~sched_policy:"credit" (* !!! FIXME hard coding *) - ~numa_affinity_policy:`default_policy + ~sched_policy:"credit" (* !!! FIXME hard coding *) ~numa_affinity_policy ~supported_bootloaders:(List.map fst Xapi_globs.supported_bootloaders) ~suspend_image_sr:Ref.null ~crash_dump_sr:Ref.null ~logging:[] ~hostname ~address ~metrics ~license_params ~boot_free_mem:0L ~ha_statefiles:[] diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index b20f4ef3fe9..5ed6b362e02 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -139,6 +139,7 @@ val create : -> secure_boot:bool -> software_version:(string * string) list -> https_only:bool + -> numa_affinity_policy:API.host_numa_affinity_policy -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 752d822135f..fe0b11cbb76 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1062,6 +1062,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~secure_boot:host.API.host_secure_boot ~software_version:host.API.host_software_version ~https_only:host.API.host_https_only + ~numa_affinity_policy:host.API.host_numa_affinity_policy in (* Copy other-config into newly created host record: *) no_exn From fb66dfc030d3b5c007bacbcaecce75fa0cab96d1 Mon Sep 17 00:00:00 2001 From: Marcus Granado Date: Wed, 10 Dec 2025 21:21:35 +0000 Subject: [PATCH 27/59] CA-421847: set vcpu affinity if node claim succeeded Signed-off-by: Marcus Granado --- ocaml/xenopsd/xc/domain.ml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 0daf8eb78fb..6c65d467f33 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -967,17 +967,26 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = Array.map2 NUMAResource.min_memory (Array.of_seq nodes) a in numa_resources := Some nodea ; - let memory_plan = + let cpu_affinity, memory_plan = match Softaffinity.plan ~vm host nodea with | None -> D.debug "NUMA-aware placement failed for domid %d" domid ; - [] + (None, []) | Some (cpu_affinity, mem_plan) -> + (Some cpu_affinity, mem_plan) + in + let set_vcpu_affinity = function + | None -> + D.debug "%s: not setting vcpu affinity for domain %d" __FUNCTION__ + domid + | Some cpu_affinity -> + D.debug "%s: setting vcpu affinity for domain %d: %s" __FUNCTION__ + domid + (Fmt.to_to_string CPUSet.pp_dump cpu_affinity) ; let cpus = CPUSet.to_mask cpu_affinity in for i = 0 to vcpus - 1 do set_affinity affinity xcext domid i cpus - done ; - mem_plan + done in (* Xen only allows a single node when using memory claims, or none at all. *) let* numa_node, node = @@ -994,6 +1003,7 @@ let numa_placement domid ~vcpus ~cores ~memory affinity = let nr_pages = Int64.div memory 4096L |> Int64.to_int in try Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; + set_vcpu_affinity cpu_affinity ; Some (node, memory) with | Xenctrlext.Not_available -> From 06c1d620ce71a3bd7ea699d753aaef0b5e7c271b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 15 Dec 2025 17:25:54 +0000 Subject: [PATCH 28/59] CA-422071: preserve latest_synced_updates_applied and pending_guidances* across pool join MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/idl/datamodel_host.ml | 66 ++++++++++++++++++++++--------- ocaml/tests/common/test_common.ml | 2 + ocaml/tests/test_host.ml | 2 + ocaml/xapi/dbsync_slave.ml | 2 + ocaml/xapi/xapi_host.ml | 8 ++-- ocaml/xapi/xapi_host.mli | 3 ++ ocaml/xapi/xapi_pool.ml | 5 +++ 7 files changed, 67 insertions(+), 21 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index 20a96599622..38bf68a60d0 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1226,6 +1226,24 @@ let host_numa_affinity_policy = ] ) +let latest_synced_updates_applied_state = + Enum + ( "latest_synced_updates_applied_state" + , [ + ( "yes" + , "The host is up to date with the latest updates synced from remote \ + CDN" + ) + ; ( "no" + , "The host is outdated with the latest updates synced from remote CDN" + ) + ; ( "unknown" + , "If the host is up to date with the latest updates synced from \ + remote CDN is unknown" + ) + ] + ) + let create_params = [ { @@ -1430,6 +1448,36 @@ let create_params = ; param_release= numbered_release "25.39.0-next" ; param_default= Some (VEnum "default_policy") } + ; { + param_type= latest_synced_updates_applied_state + ; param_name= "latest_synced_updates_applied" + ; param_doc= + "Default as 'unknown', 'yes' if the host is up to date with updates \ + synced from remote CDN, otherwise 'no'" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VSet []) + } + ; { + param_type= Set update_guidances + ; param_name= "pending_guidances_full" + ; param_doc= + "The set of pending full guidances after applying updates, which a \ + user should follow to make some updates, e.g. specific hardware \ + drivers or CPU features, fully effective, but the 'average user' \ + doesn't need to" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VSet []) + } + ; { + param_type= Set update_guidances + ; param_name= "pending_guidances_recommended" + ; param_doc= + "The set of pending recommended guidances after applying updates, \ + which most users should follow to make the updates effective, but if \ + not followed, will not cause a failure" + ; param_release= numbered_release "25.39.0-next" + ; param_default= Some (VSet []) + } ] let create = @@ -2542,24 +2590,6 @@ let update_firewalld_service_status = status." ~allowed_roles:_R_POOL_OP () -let latest_synced_updates_applied_state = - Enum - ( "latest_synced_updates_applied_state" - , [ - ( "yes" - , "The host is up to date with the latest updates synced from remote \ - CDN" - ) - ; ( "no" - , "The host is outdated with the latest updates synced from remote CDN" - ) - ; ( "unknown" - , "If the host is up to date with the latest updates synced from \ - remote CDN is unknown" - ) - ] - ) - let get_tracked_user_agents = call ~name:"get_tracked_user_agents" ~lifecycle:[] ~doc: diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index de5541878a9..4a4db92dd0d 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -185,6 +185,8 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ~console_idle_timeout ~ssh_auto_mode ~secure_boot ~software_version:(Xapi_globs.software_version ()) ~https_only ~numa_affinity_policy:`default_policy + ~latest_synced_updates_applied:`unknown ~pending_guidances_full:[] + ~pending_guidances_recommended:[] in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index 6644c4a0f31..9e7ac61275c 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -28,6 +28,8 @@ let add_host __context name = ~console_idle_timeout:0L ~ssh_auto_mode:false ~secure_boot:false ~software_version:(Xapi_globs.software_version ()) ~https_only:false ~numa_affinity_policy:`default_policy + ~latest_synced_updates_applied:`unknown ~pending_guidances_full:[] + ~pending_guidances_recommended:[] ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index faec161097e..d66448be62a 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -67,6 +67,8 @@ let create_localhost ~__context info = ~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default ~secure_boot:false ~software_version:[] ~https_only:!Xapi_globs.https_only ~numa_affinity_policy:`default_policy + ~latest_synced_updates_applied:`unknown ~pending_guidances_full:[] + ~pending_guidances_recommended:[] in () diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 2d38db88d1a..0689a00e386 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1029,7 +1029,9 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode - ~secure_boot ~software_version ~https_only ~numa_affinity_policy = + ~secure_boot ~software_version ~https_only ~numa_affinity_policy + ~latest_synced_updates_applied ~pending_guidances_full + ~pending_guidances_recommended = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1090,8 +1092,8 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~control_domain:Ref.null ~updates_requiring_reboot:[] ~iscsi_iqn:"" ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] ~tls_verification_enabled ~last_software_update ~last_update_hash - ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown - ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~ssh_enabled + ~recommended_guidances:[] ~latest_synced_updates_applied + ~pending_guidances_recommended ~pending_guidances_full ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ~secure_boot ; (* If the host we're creating is us, make sure its set to live *) diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 5ed6b362e02..3260ff3166e 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -140,6 +140,9 @@ val create : -> software_version:(string * string) list -> https_only:bool -> numa_affinity_policy:API.host_numa_affinity_policy + -> latest_synced_updates_applied:API.latest_synced_updates_applied_state + -> pending_guidances_full:API.update_guidances_set + -> pending_guidances_recommended:API.update_guidances_set -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index fe0b11cbb76..592f53615e5 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1063,6 +1063,11 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~software_version:host.API.host_software_version ~https_only:host.API.host_https_only ~numa_affinity_policy:host.API.host_numa_affinity_policy + ~latest_synced_updates_applied: + host.API.host_latest_synced_updates_applied + ~pending_guidances_full:host.API.host_pending_guidances_full + ~pending_guidances_recommended: + host.API.host_pending_guidances_recommended in (* Copy other-config into newly created host record: *) no_exn From bd8c79ad614d54b7dd94f2bc6bb6b40fafa39a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 15 Dec 2025 15:32:57 +0000 Subject: [PATCH 29/59] CA-422071: add unit test for Host.create_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every newly added field must have an entry in Host.create_params, otherwise these settings could be lost on pool join. Signed-off-by: Edwin Török --- ocaml/idl/dune | 4 ++-- ocaml/idl/test_host.ml | 31 +++++++++++++++++++++++++++++++ ocaml/idl/test_host.mli | 0 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 ocaml/idl/test_host.ml create mode 100644 ocaml/idl/test_host.mli diff --git a/ocaml/idl/dune b/ocaml/idl/dune index bc22a311cd7..ac591ae1e0f 100644 --- a/ocaml/idl/dune +++ b/ocaml/idl/dune @@ -64,9 +64,9 @@ ) (tests - (names schematest test_datetimes) + (names schematest test_datetimes test_host) (modes exe) - (modules schematest test_datetimes) + (modules schematest test_datetimes test_host) (libraries astring rpclib.core diff --git a/ocaml/idl/test_host.ml b/ocaml/idl/test_host.ml new file mode 100644 index 00000000000..b70a9cb72dc --- /dev/null +++ b/ocaml/idl/test_host.ml @@ -0,0 +1,31 @@ +module DT = Datamodel_types +module FieldSet = Astring.String.Set + +let recent_field (f : DT.field) = f.lifecycle.transitions = [] + +let rec field_full_names = function + | DT.Field f -> + if recent_field f then + f.full_name |> String.concat "_" |> Seq.return + else + Seq.empty + | DT.Namespace (_, xs) -> + xs |> List.to_seq |> Seq.concat_map field_full_names + +let () = + let create_params = + Datamodel_host.create_params + |> List.map (fun p -> p.DT.param_name) + |> FieldSet.of_list + and fields = + Datamodel_host.t.contents + |> List.to_seq + |> Seq.concat_map field_full_names + |> FieldSet.of_seq + in + let missing_in_create_params = FieldSet.diff fields create_params in + if not (FieldSet.is_empty missing_in_create_params) then ( + Format.eprintf "Missing fields in create_params: %a@." FieldSet.dump + missing_in_create_params ; + exit 1 + ) diff --git a/ocaml/idl/test_host.mli b/ocaml/idl/test_host.mli new file mode 100644 index 00000000000..e69de29bb2d From f8ecfbd412c14e820282305fb3105e926eabae9b Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Tue, 11 Nov 2025 16:07:43 +0000 Subject: [PATCH 30/59] libs: Add token-bucket library The token bucket library implements the token bucket algorithm, to be used for rate-limiting. This commit implements basic token buckets, which contain tokens that are refilled over time according to their refill parameter, up to a maximum determined by the burst parameter. Tokens can be consumed in a thread-safe way - consuming returns false when there are not enough tokens available, and true when the operation was successful. Signed-off-by: Christian Pardillo Laursen --- dune-project | 7 +++ ocaml/libs/rate-limit/dune | 7 +++ ocaml/libs/rate-limit/token_bucket.ml | 66 ++++++++++++++++++++++ ocaml/libs/rate-limit/token_bucket.mli | 76 ++++++++++++++++++++++++++ opam/rate-limit.opam | 31 +++++++++++ 5 files changed, 187 insertions(+) create mode 100644 ocaml/libs/rate-limit/dune create mode 100644 ocaml/libs/rate-limit/token_bucket.ml create mode 100644 ocaml/libs/rate-limit/token_bucket.mli create mode 100644 opam/rate-limit.opam diff --git a/dune-project b/dune-project index c5889ff42ac..d855636c0ac 100644 --- a/dune-project +++ b/dune-project @@ -52,6 +52,13 @@ (name tgroup) (depends xapi-log xapi-stdext-unix)) +(package + (name rate-limit) + (synopsis "Simple token bucket-based rate-limiting") + (depends + (ocaml (>= 4.12)) + xapi-log xapi-stdext-unix)) + (package (name xml-light2)) diff --git a/ocaml/libs/rate-limit/dune b/ocaml/libs/rate-limit/dune new file mode 100644 index 00000000000..3436c398228 --- /dev/null +++ b/ocaml/libs/rate-limit/dune @@ -0,0 +1,7 @@ +(library + (name rate_limit) + (public_name rate-limit) + (libraries threads.posix mtime mtime.clock.os xapi-log xapi-stdext-threads clock) +) + + diff --git a/ocaml/libs/rate-limit/token_bucket.ml b/ocaml/libs/rate-limit/token_bucket.ml new file mode 100644 index 00000000000..8ccc047b306 --- /dev/null +++ b/ocaml/libs/rate-limit/token_bucket.ml @@ -0,0 +1,66 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + +type t = { + burst_size: float + ; fill_rate: float + ; mutable tokens: float + ; mutable last_refill: Mtime.span + ; mutex: Mutex.t +} + +let create_with_timestamp timestamp ~burst_size ~fill_rate = + { + burst_size + ; fill_rate + ; tokens= burst_size + ; last_refill= timestamp + ; mutex= Mutex.create () + } + +let create = create_with_timestamp (Mtime_clock.elapsed ()) + +let peek_with_timestamp timestamp tb = + let time_delta = Mtime.Span.abs_diff tb.last_refill timestamp in + let time_delta_seconds = Mtime.Span.to_float_ns time_delta *. 1e-9 in + min tb.burst_size (tb.tokens +. (time_delta_seconds *. tb.fill_rate)) + +let peek tb = peek_with_timestamp (Mtime_clock.elapsed ()) tb + +let consume_with_timestamp get_time tb amount = + let do_consume () = + let timestamp = get_time () in + let new_tokens = peek_with_timestamp timestamp tb in + tb.last_refill <- timestamp ; + if new_tokens >= amount then ( + tb.tokens <- new_tokens -. amount ; + true + ) else ( + tb.tokens <- new_tokens ; + false + ) + in + with_lock tb.mutex do_consume + +let consume = consume_with_timestamp Mtime_clock.elapsed + +let delay_until_available_timestamp timestamp tb amount = + let current_tokens = peek_with_timestamp timestamp tb in + let required_tokens = max 0. (amount -. current_tokens) in + required_tokens /. tb.fill_rate + +let delay_until_available tb amount = + delay_until_available_timestamp (Mtime_clock.elapsed ()) tb amount diff --git a/ocaml/libs/rate-limit/token_bucket.mli b/ocaml/libs/rate-limit/token_bucket.mli new file mode 100644 index 00000000000..1379fa64c58 --- /dev/null +++ b/ocaml/libs/rate-limit/token_bucket.mli @@ -0,0 +1,76 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type t + +val create_with_timestamp : + Mtime.span -> burst_size:float -> fill_rate:float -> t +(** Create token bucket with given parameters and supplied inital timestamp + @param timestamp Initial timestamp + @param burst_size Maximum number of tokens that can fit in the bucket + @param fill_rate Number of tokens added to the bucket per second + *) + +val create : burst_size:float -> fill_rate:float -> t +(** Create token bucket with given parameters. + @param burst_size Maximum number of tokens that can fit in the bucket + @param fill_rate Number of tokens added to the bucket per second + *) + +val peek_with_timestamp : Mtime.span -> t -> float +(** Retrieve token amount in token bucket at given timestamp. + Undefined behaviour when [timestamp] <= [tb.timestamp] + @param timestamp Current time + @param tb Token bucket + @return Amount of tokens in the token bucket + *) + +val peek : t -> float +(** Retrieve current token amount + @param tb Token bucket + @return Amount of tokens in the token bucket + *) + +val consume_with_timestamp : (unit -> Mtime.span) -> t -> float -> bool +(** Consume tokens from the bucket in a thread-safe manner, using supplied + function for obtaining the current time + @param get_time Function to obtain timestamp, e.g. Mtime_clock.elapsed + @param tb Token bucket + @param amount How many tokens to consume + @return Whether the tokens were successfully consumed + *) + +val consume : t -> float -> bool +(** Consume tokens from the bucket in a thread-safe manner. + @param tb Token bucket + @param amount How many tokens to consume + @return Whether the tokens were successfully consumed + *) + +val delay_until_available_timestamp : Mtime.span -> t -> float -> float +(** Get number of seconds that need to pass until bucket is expected to have + enough tokens to fulfil the request + @param timestamp + @param tb Token bucket + @param amount How many tokens we want to consume + @return Number of seconds until tokens are available +*) + +val delay_until_available : t -> float -> float +(** Get number of seconds that need to pass until bucket is expected to have + enough tokens to fulfil the request + @param tb Token bucket + @param amount How many tokens we want to consume + @return Number of seconds until tokens are available +*) diff --git a/opam/rate-limit.opam b/opam/rate-limit.opam new file mode 100644 index 00000000000..e5114dc41fb --- /dev/null +++ b/opam/rate-limit.opam @@ -0,0 +1,31 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "Simple token bucket-based rate-limiting" +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" +homepage: "https://xapi-project.github.io/" +bug-reports: "https://github.com/xapi-project/xen-api/issues" +depends: [ + "dune" {>= "3.20"} + "ocaml" {>= "4.12"} + "xapi-log" + "xapi-stdext-unix" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/xapi-project/xen-api.git" +x-maintenance-intent: ["(latest)"] From e3bd061a9a5fca1ecf4ba13b8d83624ac4b8c258 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Thu, 27 Nov 2025 13:51:28 +0000 Subject: [PATCH 31/59] rate-limit: Test token bucket Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/test/dune | 4 + .../libs/rate-limit/test/test_token_bucket.ml | 397 ++++++++++++++++++ .../rate-limit/test/test_token_bucket.mli | 0 3 files changed, 401 insertions(+) create mode 100644 ocaml/libs/rate-limit/test/dune create mode 100644 ocaml/libs/rate-limit/test/test_token_bucket.ml create mode 100644 ocaml/libs/rate-limit/test/test_token_bucket.mli diff --git a/ocaml/libs/rate-limit/test/dune b/ocaml/libs/rate-limit/test/dune new file mode 100644 index 00000000000..719cb056013 --- /dev/null +++ b/ocaml/libs/rate-limit/test/dune @@ -0,0 +1,4 @@ +(test + (name test_token_bucket) + (package rate-limit) + (libraries rate_limit alcotest qcheck-core qcheck-alcotest mtime mtime.clock.os fmt xapi-log threads.posix)) diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.ml b/ocaml/libs/rate-limit/test/test_token_bucket.ml new file mode 100644 index 00000000000..e0cb8d69dc2 --- /dev/null +++ b/ocaml/libs/rate-limit/test/test_token_bucket.ml @@ -0,0 +1,397 @@ +open Thread +open Rate_limit +open QCheck + +let test_consume_removes_correct_amount () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + in + + Alcotest.(check (float 0.0)) + "Initial tokens should be burst_size" 10.0 + (Token_bucket.peek_with_timestamp initial_time tb) ; + + let consume_time = Mtime.Span.of_uint64_ns 1_000_000_000L in + let success = + Token_bucket.consume_with_timestamp (fun () -> consume_time) tb 3.0 + in + Alcotest.(check bool) "Consume 3 tokens should succeed" true success ; + Alcotest.(check (float 0.0)) + "After consume, tokens should be 7" 7.0 + (Token_bucket.peek_with_timestamp consume_time tb) + +let test_consume_more_than_available () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Token_bucket.create_with_timestamp initial_time ~burst_size:5.0 + ~fill_rate:1.0 + in + + let _ = Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 4.0 in + + let consume_time = Mtime.Span.of_uint64_ns 1_000_000_000L in + let success = + Token_bucket.consume_with_timestamp (fun () -> consume_time) tb 10.0 + in + Alcotest.(check bool) "Consume more than available should fail" false success ; + Alcotest.(check (float 0.0)) + "After failed consume, tokens should be 2" 2.0 + (Token_bucket.peek_with_timestamp consume_time tb) + +let test_consume_refills_before_removing () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + in + + let first_consume = + Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 5.0 + in + Alcotest.(check bool) "First consume should succeed" true first_consume ; + + let later_time = Mtime.Span.of_uint64_ns 3_000_000_000L in + let second_consume = + Token_bucket.consume_with_timestamp (fun () -> later_time) tb 8.0 + in + + Alcotest.(check bool) + "Second consume after refill should succeed" true second_consume ; + + Alcotest.(check (float 0.0)) + "After refill and consume, tokens should be 2" 2.0 + (Token_bucket.peek_with_timestamp later_time tb) + +let test_peek_respects_burst_size () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:5.0 + in + + let _ = Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 8.0 in + + let later_time = Mtime.Span.of_uint64_ns 10_000_000_000L in + let available = Token_bucket.peek_with_timestamp later_time tb in + Alcotest.(check (float 0.0)) + "Peek should respect burst_size limit" 10.0 available + +let test_concurrent_access () = + let tb = + Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:15.0 + ~fill_rate:0.0 + in + let threads = + Array.init 10 (fun _ -> + create + (fun () -> + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb 1.0 + ) + () + ) + in + Array.iter Thread.join threads ; + Alcotest.(check (float 0.0)) + "Threads consuming concurrently should all remove from token amount" + (Token_bucket.peek_with_timestamp Mtime.Span.zero tb) + 5.0 + +let test_sleep () = + let tb = Token_bucket.create ~burst_size:20.0 ~fill_rate:5.0 in + let _ = Token_bucket.consume tb 10.0 in + Thread.delay 1.0 ; + Alcotest.(check (float 0.2)) + "Sleep 1 should refill token bucket by fill_rate" 15.0 (Token_bucket.peek tb) + +let test_system_time_versions () = + let tb = Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0 in + + let initial_peek = Token_bucket.peek tb in + Alcotest.(check (float 0.01)) + "System time peek should return burst_size initially" 10.0 initial_peek ; + + let consume_result = Token_bucket.consume tb 3.0 in + Alcotest.(check bool) "System time consume should succeed" true consume_result ; + + let after_consume_peek = Token_bucket.peek tb in + Alcotest.(check (float 0.01)) + "After consume, should have 7 tokens" 7.0 after_consume_peek + +let test_concurrent_system_time () = + let tb = Token_bucket.create ~burst_size:100.0 ~fill_rate:10.0 in + let num_threads = 20 in + let consume_per_thread = 3 in + + let threads = + Array.init num_threads (fun _ -> + create + (fun () -> + for _ = 1 to consume_per_thread do + ignore (Token_bucket.consume tb 1.0) + done + ) + () + ) + in + Array.iter Thread.join threads ; + + let remaining = Token_bucket.peek tb in + let expected_remaining = + 100.0 -. float_of_int (num_threads * consume_per_thread) + in + Alcotest.(check (float 0.1)) + "Concurrent system time consumption should work correctly" + expected_remaining remaining + +let test_consume_more_than_available_concurrent () = + let tb = + Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 + ~fill_rate:0.0 + in + let num_threads = 10 in + let consume_per_thread = 1 in + let successful_consumes = ref 0 in + let counter_mutex = Mutex.create () in + + let threads = + Array.init num_threads (fun _ -> + create + (fun () -> + let success = + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb + (float_of_int consume_per_thread) + in + if success then ( + Mutex.lock counter_mutex ; + incr successful_consumes ; + Mutex.unlock counter_mutex + ) + ) + () + ) + in + Array.iter Thread.join threads ; + + Alcotest.(check int) + "Only 5 consumptions should succeed" 5 !successful_consumes ; + Alcotest.(check (float 0.0)) + "Bucket should be empty after consumptions" 0.0 + (Token_bucket.peek_with_timestamp Mtime.Span.zero tb) + +let test_delay_until_available () = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + in + + let _ = + Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 10.0 + in + + let delay = + Token_bucket.delay_until_available_timestamp initial_time tb 4.0 + in + Alcotest.(check (float 0.01)) + "Delay for 4 tokens at 2 tokens/sec should be 2 seconds" 2.0 delay ; + + let tb_fresh = Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0 in + let _ = Token_bucket.consume tb_fresh 10.0 in + let delay_system = Token_bucket.delay_until_available tb_fresh 4.0 in + + Alcotest.(check (float 0.1)) + "System time delay should be approximately 2 seconds" 2.0 delay_system + +let test_edge_cases () = + let tb_zero_rate = + Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 + ~fill_rate:0.0 + in + let _ = + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb_zero_rate 2.0 + in + let later_time = Mtime.Span.of_uint64_ns 1_000_000_000_000L in + let available = Token_bucket.peek_with_timestamp later_time tb_zero_rate in + Alcotest.(check (float 0.0)) + "Zero fill rate should not add tokens" 3.0 available ; + + let tb_zero_amount = + Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 + ~fill_rate:1.0 + in + let success = + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb_zero_amount 0.0 + in + Alcotest.(check bool) "Consuming zero tokens should succeed" true success ; + + let tb_small = + Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:1.0 + ~fill_rate:0.1 + in + let success_small = + Token_bucket.consume_with_timestamp + (fun () -> Mtime.Span.zero) + tb_small 0.001 + in + Alcotest.(check bool) + "Consuming very small amount should succeed" true success_small ; + + let tb_zero = Token_bucket.create ~burst_size:0.0 ~fill_rate:0.0 in + let success_zero = Token_bucket.consume tb_zero 0.0 in + let success_small = Token_bucket.consume tb_zero 0.001 in + Alcotest.(check bool) "Consuming zero tokens should succeed" true success_zero ; + Alcotest.(check bool) + "Consuming very small amount should fail" false success_small + +let test_consume_quickcheck = + let open QCheck.Gen in + let gen_operations = + let gen_operation = + pair (float_range 0.0 1000.0) (int_range 0 1_000_000_000) + in + list_size (int_range 1 50) gen_operation + in + + let fail_peek op_num time_ns time_delta expected current added actual diff = + QCheck.Test.fail_reportf + "Operation %d: peek failed\n\ + \ Time: %d ns (delta: %d ns)\n\ + \ Expected tokens: %.3f (current: %.3f + added: %.3f)\n\ + \ Actual tokens: %.3f\n\ + \ Diff: %.6f" op_num time_ns time_delta expected current added actual + diff + in + + let fail_consume op_num time_ns time_delta amount available success expected + actual diff = + QCheck.Test.fail_reportf + "Operation %d: consume failed\n\ + \ Time: %d ns (delta: %d ns)\n\ + \ Consume amount: %.3f\n\ + \ Available before: %.3f\n\ + \ Success: %b\n\ + \ Expected after: %.3f\n\ + \ Actual after: %.3f\n\ + \ Diff: %.6f" op_num time_ns time_delta amount available success expected + actual diff + in + + let property (burst_size, fill_rate, operations) = + let initial_time = Mtime.Span.of_uint64_ns 0L in + let tb = + Token_bucket.create_with_timestamp initial_time ~burst_size ~fill_rate + in + + let rec check_operations op_num time_ns last_refill_ns current_tokens ops = + match ops with + | [] -> + true + | (consume_amount, time_delta_ns) :: rest -> + let new_time_ns = time_ns + time_delta_ns in + let current_time = + Mtime.Span.of_uint64_ns (Int64.of_int new_time_ns) + in + let time_since_refill_seconds = + float_of_int (new_time_ns - last_refill_ns) *. 1e-9 + in + let tokens_added = time_since_refill_seconds *. fill_rate in + let expected_available = + min burst_size (current_tokens +. tokens_added) + in + let actual_before = + Token_bucket.peek_with_timestamp current_time tb + in + let peek_diff = abs_float (actual_before -. expected_available) in + + if peek_diff >= 0.001 then + fail_peek op_num new_time_ns time_delta_ns expected_available + current_tokens tokens_added actual_before peek_diff + else + let success = + Token_bucket.consume_with_timestamp + (fun () -> current_time) + tb consume_amount + in + let actual_after = + Token_bucket.peek_with_timestamp current_time tb + in + let new_tokens = + if success then + expected_available -. consume_amount + else + expected_available + in + let after_diff = abs_float (actual_after -. new_tokens) in + + if after_diff >= 0.001 then + fail_consume op_num new_time_ns time_delta_ns consume_amount + expected_available success new_tokens actual_after after_diff + else + check_operations (op_num + 1) new_time_ns new_time_ns new_tokens + rest + in + + check_operations 1 0 0 burst_size operations + in + + let gen_all = + map3 (fun burst fill ops -> (burst, fill, ops)) pfloat pfloat gen_operations + in + + let arb_all = + QCheck.make + ~print:(fun (burst, fill, ops) -> + let ops_str = + ops + |> List.mapi (fun i (amount, delta) -> + Printf.sprintf " Op %d: consume %.3f at +%d ns" (i + 1) + amount delta + ) + |> String.concat "\n" + in + Printf.sprintf "burst_size=%.3f, fill_rate=%.3f, %d operations:\n%s" + burst fill (List.length ops) ops_str + ) + gen_all + in + + Test.make ~name:"Consume operations maintain correct token count" ~count:100 + arb_all (fun (burst, fill, ops) -> property (burst, fill, ops) + ) + +let test = + [ + ( "Consume removes correct amount" + , `Quick + , test_consume_removes_correct_amount + ) + ; ("Consume more than available", `Quick, test_consume_more_than_available) + ; ( "Consume refills before removing" + , `Quick + , test_consume_refills_before_removing + ) + ; ("Peek respects burst size", `Quick, test_peek_respects_burst_size) + ; ("Concurrent access", `Quick, test_concurrent_access) + ; ("Refill after sleep", `Slow, test_sleep) + ; ("System time versions", `Quick, test_system_time_versions) + ; ("Concurrent system time", `Quick, test_concurrent_system_time) + ; ( "Consume more than available concurrent" + , `Quick + , test_consume_more_than_available_concurrent + ) + ; ("Delay until available", `Quick, test_delay_until_available) + ; ("Edge cases", `Quick, test_edge_cases) + ; QCheck_alcotest.to_alcotest test_consume_quickcheck + ] + +let () = Alcotest.run "Token bucket library" [("Token bucket tests", test)] diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.mli b/ocaml/libs/rate-limit/test/test_token_bucket.mli new file mode 100644 index 00000000000..e69de29bb2d From 3930feba9190248cdef2f4799c50fd73c721079c Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Thu, 27 Nov 2025 13:52:04 +0000 Subject: [PATCH 32/59] rate-limit: Implement bucket tables Bucket tables map client identifiers to their token buckets, and are the main data structure for rate limiting. Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 44 ++++++++++++++++++++++++++ ocaml/libs/rate-limit/bucket_table.mli | 26 +++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 ocaml/libs/rate-limit/bucket_table.ml create mode 100644 ocaml/libs/rate-limit/bucket_table.mli diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml new file mode 100644 index 00000000000..f09dad9edbd --- /dev/null +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -0,0 +1,44 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type t = (string, Token_bucket.t) Hashtbl.t + +let create () = Hashtbl.create 16 + +let add_bucket table ~user_agent ~burst_size ~fill_rate = + let bucket = Token_bucket.create ~burst_size ~fill_rate in + Hashtbl.add table user_agent bucket + +let delete_bucket table ~user_agent = Hashtbl.remove table user_agent + +let try_consume table user_agent amount = + match Hashtbl.find_opt table user_agent with + | None -> + false + | Some bucket -> + Token_bucket.consume bucket amount + +let consume_and_block table user_agent amount = + match Hashtbl.find_opt table user_agent with + | None -> + () + | Some bucket -> + let rec try_consume () = + if Token_bucket.consume bucket amount then + () + else + let wait_time = Token_bucket.delay_until_available bucket amount in + Thread.delay wait_time ; try_consume () + in + try_consume () diff --git a/ocaml/libs/rate-limit/bucket_table.mli b/ocaml/libs/rate-limit/bucket_table.mli new file mode 100644 index 00000000000..243748d3c44 --- /dev/null +++ b/ocaml/libs/rate-limit/bucket_table.mli @@ -0,0 +1,26 @@ +(* + * Copyright (C) 2025 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type t = (string, Token_bucket.t) Hashtbl.t + +val create : unit -> t + +val add_bucket : + t -> user_agent:string -> burst_size:float -> fill_rate:float -> unit + +val delete_bucket : t -> user_agent:string -> unit + +val try_consume : t -> string -> float -> bool + +val consume_and_block : t -> string -> float -> unit From a3c275f3a421f2bc7887ff0ce06ecb292b9e150c Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Fri, 28 Nov 2025 11:26:25 +0000 Subject: [PATCH 33/59] rate-limit: Create bucket table from xapi globs To be replaced with a proper datamodel. Bucket tables are used for mapping requests to their respective token bucket so that they can be rate limited. Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi/dune | 1 + ocaml/xapi/xapi_globs.ml | 8 ++++++++ ocaml/xapi/xapi_rate_limit.ml | 33 +++++++++++++++++++++++++++++++++ ocaml/xapi/xapi_rate_limit.mli | 13 +++++++++++++ 4 files changed, 55 insertions(+) create mode 100644 ocaml/xapi/xapi_rate_limit.ml create mode 100644 ocaml/xapi/xapi_rate_limit.mli diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 8095a5c4bfc..e79fb9d21c0 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -165,6 +165,7 @@ psq ptime ptime.clock.os + rate-limit rpclib.core rpclib.json rpclib.xml diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 161273c83f9..c49de6b5ef7 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1111,6 +1111,8 @@ let python3_path = Constants.python3_path let observer_experimental_components = ref (StringSet.singleton Constants.observer_component_smapi) +let rate_limited_clients = ref ["test-rate-limit:1.0:0.1"] + let pool_recommendations_dir = ref "/etc/xapi.pool-recommendations.d" let disable_webserver = ref false @@ -1782,6 +1784,12 @@ let other_options = ) , "Comma-separated list of experimental observer components" ) + ; ( "rate-limited-clients" + , Arg.String (fun s -> rate_limited_clients := String.split_on_char ',' s) + , (fun () -> String.concat "," !rate_limited_clients) + , "Comma-separated list of rate limited clients and their configurations, \ + in format client:burst:refill" + ) ; ( "disable-webserver" , Arg.Set disable_webserver , (fun () -> string_of_bool !disable_webserver) diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml new file mode 100644 index 00000000000..6beec18a2f7 --- /dev/null +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -0,0 +1,33 @@ +(* + * Copyright (C) 2025 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +module D = Debug.Make (struct let name = "xapi_rate_limit" end) + +let bucket_table = + let table = Rate_limit.Bucket_table.create () in + let configs = !Xapi_globs.rate_limited_clients in + List.iter + (fun s -> + match String.split_on_char ':' s with + | [name; burst_s; refill_s] -> ( + match (float_of_string_opt burst_s, float_of_string_opt refill_s) with + | Some burst, Some refill -> + Rate_limit.Bucket_table.add_bucket table name burst refill + | _ -> + Printf.eprintf "Skipping invalid numeric values in: %s\n" s + ) + | _ -> + Printf.eprintf "Skipping invalid item format: %s\n" s + ) + configs ; + table diff --git a/ocaml/xapi/xapi_rate_limit.mli b/ocaml/xapi/xapi_rate_limit.mli new file mode 100644 index 00000000000..f4c5c6cf76b --- /dev/null +++ b/ocaml/xapi/xapi_rate_limit.mli @@ -0,0 +1,13 @@ +(* + * Copyright (C) 2025 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) From 85d7bc184cec7ed1054556f48dc12d8fd860a853 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Fri, 28 Nov 2025 12:01:23 +0000 Subject: [PATCH 34/59] xapi: Add rate limiting to do_dispatch Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi/dune | 3 +++ ocaml/xapi/server_helpers.ml | 3 +++ ocaml/xapi/xapi_rate_limit.ml | 9 +++++---- ocaml/xapi/xapi_rate_limit.mli | 2 ++ 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index e79fb9d21c0..600503b0e71 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -65,6 +65,7 @@ exnHelper rbac_static xapi_role + xapi_rate_limit xapi_extensions db) (modes best) @@ -83,6 +84,7 @@ threads.posix fmt clock + rate-limit astring stunnel sexplib0 @@ -129,6 +131,7 @@ locking_helpers exnHelper xapi_role + xapi_rate_limit xapi_extensions db)) (libraries diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 0fe9383c737..84481025331 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -183,6 +183,9 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name (* Return task id immediately *) Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) in + Rate_limit.Bucket_table.consume_and_block Xapi_rate_limit.bucket_table + (Option.value http_req.user_agent ~default:"") + 1. ; match sync_ty with | `Sync -> sync () diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml index 6beec18a2f7..f150215add5 100644 --- a/ocaml/xapi/xapi_rate_limit.ml +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -19,10 +19,11 @@ let bucket_table = List.iter (fun s -> match String.split_on_char ':' s with - | [name; burst_s; refill_s] -> ( - match (float_of_string_opt burst_s, float_of_string_opt refill_s) with - | Some burst, Some refill -> - Rate_limit.Bucket_table.add_bucket table name burst refill + | [user_agent; burst_s; fill_s] -> ( + match (float_of_string_opt burst_s, float_of_string_opt fill_s) with + | Some burst_size, Some fill_rate -> + Rate_limit.Bucket_table.add_bucket table ~user_agent ~burst_size + ~fill_rate | _ -> Printf.eprintf "Skipping invalid numeric values in: %s\n" s ) diff --git a/ocaml/xapi/xapi_rate_limit.mli b/ocaml/xapi/xapi_rate_limit.mli index f4c5c6cf76b..0b297dffbcc 100644 --- a/ocaml/xapi/xapi_rate_limit.mli +++ b/ocaml/xapi/xapi_rate_limit.mli @@ -11,3 +11,5 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) + +val bucket_table : Rate_limit.Bucket_table.t From 259816de79ace2025be88c1d53f7eb2b8aee1d8f Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 1 Dec 2025 10:15:02 +0000 Subject: [PATCH 35/59] xapi rate limiting: Add logging Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 7 +++++-- ocaml/libs/rate-limit/bucket_table.mli | 6 ++++-- ocaml/xapi/server_helpers.ml | 21 ++++++++++++++++++--- ocaml/xapi/xapi_rate_limit.ml | 4 ++++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index f09dad9edbd..d66ca353bab 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -22,14 +22,17 @@ let add_bucket table ~user_agent ~burst_size ~fill_rate = let delete_bucket table ~user_agent = Hashtbl.remove table user_agent -let try_consume table user_agent amount = +let try_consume table ~user_agent amount = match Hashtbl.find_opt table user_agent with | None -> false | Some bucket -> Token_bucket.consume bucket amount -let consume_and_block table user_agent amount = +let peek table ~user_agent = + Option.map Token_bucket.peek (Hashtbl.find_opt table user_agent) + +let consume_and_block table ~user_agent amount = match Hashtbl.find_opt table user_agent with | None -> () diff --git a/ocaml/libs/rate-limit/bucket_table.mli b/ocaml/libs/rate-limit/bucket_table.mli index 243748d3c44..fdb6fd35332 100644 --- a/ocaml/libs/rate-limit/bucket_table.mli +++ b/ocaml/libs/rate-limit/bucket_table.mli @@ -19,8 +19,10 @@ val create : unit -> t val add_bucket : t -> user_agent:string -> burst_size:float -> fill_rate:float -> unit +val peek : t -> user_agent:string -> float option + val delete_bucket : t -> user_agent:string -> unit -val try_consume : t -> string -> float -> bool +val try_consume : t -> user_agent:string -> float -> bool -val consume_and_block : t -> string -> float -> unit +val consume_and_block : t -> user_agent:string -> float -> unit diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 84481025331..5ac0127e209 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -183,9 +183,24 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name (* Return task id immediately *) Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) in - Rate_limit.Bucket_table.consume_and_block Xapi_rate_limit.bucket_table - (Option.value http_req.user_agent ~default:"") - 1. ; + let user_agent_option = http_req.user_agent in + let peek_result = + Option.bind user_agent_option (fun user_agent -> + Rate_limit.Bucket_table.peek Xapi_rate_limit.bucket_table ~user_agent + ) + in + ( match user_agent_option with + | Some user_agent -> + D.debug + "Bucket table: Expecting to consume %f tokens from user_agent %s" + (Option.value peek_result ~default:0.) + user_agent ; + Rate_limit.Bucket_table.consume_and_block Xapi_rate_limit.bucket_table + ~user_agent:(Option.value http_req.user_agent ~default:"") + 1. + | None -> + D.debug "Bucket table: user_agent was None, not throttling" + ) ; match sync_ty with | `Sync -> sync () diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml index f150215add5..dae240f2862 100644 --- a/ocaml/xapi/xapi_rate_limit.ml +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -22,6 +22,10 @@ let bucket_table = | [user_agent; burst_s; fill_s] -> ( match (float_of_string_opt burst_s, float_of_string_opt fill_s) with | Some burst_size, Some fill_rate -> + D.debug + "Adding user agent %s to bucket table with burst size %f and \ + fill rate %f" + user_agent burst_size fill_rate ; Rate_limit.Bucket_table.add_bucket table ~user_agent ~burst_size ~fill_rate | _ -> From a96c26ef01ee5dfd6e64cdc2b57fb2126da9cf21 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 1 Dec 2025 11:57:05 +0000 Subject: [PATCH 36/59] rate_limit: Add rate limiter to xapi initialisation Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi/xapi.ml | 4 ++++ ocaml/xapi/xapi_rate_limit.ml | 16 ++++++++-------- ocaml/xapi/xapi_rate_limit.mli | 2 ++ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 785950c384e..c45ba121528 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1117,6 +1117,10 @@ let server_init () = , fun () -> List.iter Xapi_http.add_handler master_only_http_handlers ) + ; ( "Registering rate limits" + , [Startup.OnlyMaster] + , fun () -> Xapi_rate_limit.register_xapi_globs () + ) ; ( "Listening unix socket" , [] , fun () -> listen_unix_socket Xapi_globs.unix_domain_socket diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml index dae240f2862..b86f6c0457d 100644 --- a/ocaml/xapi/xapi_rate_limit.ml +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -13,8 +13,9 @@ *) module D = Debug.Make (struct let name = "xapi_rate_limit" end) -let bucket_table = - let table = Rate_limit.Bucket_table.create () in +let bucket_table = Rate_limit.Bucket_table.create () + +let register_xapi_globs () = let configs = !Xapi_globs.rate_limited_clients in List.iter (fun s -> @@ -26,13 +27,12 @@ let bucket_table = "Adding user agent %s to bucket table with burst size %f and \ fill rate %f" user_agent burst_size fill_rate ; - Rate_limit.Bucket_table.add_bucket table ~user_agent ~burst_size - ~fill_rate + Rate_limit.Bucket_table.add_bucket bucket_table ~user_agent + ~burst_size ~fill_rate | _ -> - Printf.eprintf "Skipping invalid numeric values in: %s\n" s + D.debug "Skipping invalid numeric values in: %s\n" s ) | _ -> - Printf.eprintf "Skipping invalid item format: %s\n" s + D.debug "Skipping invalid item format: %s\n" s ) - configs ; - table + configs diff --git a/ocaml/xapi/xapi_rate_limit.mli b/ocaml/xapi/xapi_rate_limit.mli index 0b297dffbcc..f9b48bd83bb 100644 --- a/ocaml/xapi/xapi_rate_limit.mli +++ b/ocaml/xapi/xapi_rate_limit.mli @@ -13,3 +13,5 @@ *) val bucket_table : Rate_limit.Bucket_table.t + +val register_xapi_globs : unit -> unit From 06d4d7d51cee952b02944c87a784a4a59be54738 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 1 Dec 2025 16:07:32 +0000 Subject: [PATCH 37/59] Rate limiting: Improve token_bucket documentation Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/token_bucket.mli | 78 +++++++++++++++++--------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/ocaml/libs/rate-limit/token_bucket.mli b/ocaml/libs/rate-limit/token_bucket.mli index 1379fa64c58..783791a6917 100644 --- a/ocaml/libs/rate-limit/token_bucket.mli +++ b/ocaml/libs/rate-limit/token_bucket.mli @@ -12,18 +12,63 @@ * GNU Lesser General Public License for more details. *) +(** This module implements a classic token-bucket rate limiter. Token buckets + contain tokens that are refilled over time, and can be consumed in a + thread-safe way. A token bucket accumulates [fill_rate] tokens per second, + up to [burst_size]. Consumers may take tokens (if available), or query when + enough tokens will become available. + + Token buckets implement rate limiting by allowing operations to proceed + only when sufficient tokens are available - otherwise, the operations can + be delayed until enough tokens are available. + + To avoid doing unnecessary work to refill the bucket, token amounts are + only updated when a consume operation is carried out. The buckets keep a + last_refill timestamp which is updated on consume in tandem with the token + counts, and informs how many tokens should be added by the bucket refill. + + We include versions of functions that take a timestamp as a parameter for + testing purposes only - consumers of this library should use the + timestamp-less versions. +*) + type t -val create_with_timestamp : - Mtime.span -> burst_size:float -> fill_rate:float -> t -(** Create token bucket with given parameters and supplied inital timestamp - @param timestamp Initial timestamp +val create : burst_size:float -> fill_rate:float -> t +(** Create token bucket with given parameters. @param burst_size Maximum number of tokens that can fit in the bucket @param fill_rate Number of tokens added to the bucket per second *) -val create : burst_size:float -> fill_rate:float -> t -(** Create token bucket with given parameters. +val peek : t -> float +(** Retrieve current token amount + @param tb Token bucket + @return Amount of tokens in the token bucket + *) + +val consume : t -> float -> bool +(** Consume tokens from the bucket in a thread-safe manner. + @param tb Token bucket + @param amount How many tokens to consume + @return Whether the tokens were successfully consumed + *) + +val delay_until_available : t -> float -> float +(** Get number of seconds that need to pass until bucket is expected to have + enough tokens to fulfil the request + @param tb Token bucket + @param amount How many tokens we want to consume + @return Number of seconds until tokens are available +*) + +(**/**) + +(* Fuctions accepting a timestamp are meant for testing only *) + +val create_with_timestamp : + Mtime.span -> burst_size:float -> fill_rate:float -> t +(** Create token bucket with given parameters and supplied inital timestamp + @param timestamp Initial timestamp @param burst_size Maximum number of tokens that can fit in the bucket @param fill_rate Number of tokens added to the bucket per second *) @@ -36,12 +81,6 @@ val peek_with_timestamp : Mtime.span -> t -> float @return Amount of tokens in the token bucket *) -val peek : t -> float -(** Retrieve current token amount - @param tb Token bucket - @return Amount of tokens in the token bucket - *) - val consume_with_timestamp : (unit -> Mtime.span) -> t -> float -> bool (** Consume tokens from the bucket in a thread-safe manner, using supplied function for obtaining the current time @@ -51,13 +90,6 @@ val consume_with_timestamp : (unit -> Mtime.span) -> t -> float -> bool @return Whether the tokens were successfully consumed *) -val consume : t -> float -> bool -(** Consume tokens from the bucket in a thread-safe manner. - @param tb Token bucket - @param amount How many tokens to consume - @return Whether the tokens were successfully consumed - *) - val delay_until_available_timestamp : Mtime.span -> t -> float -> float (** Get number of seconds that need to pass until bucket is expected to have enough tokens to fulfil the request @@ -67,10 +99,4 @@ val delay_until_available_timestamp : Mtime.span -> t -> float -> float @return Number of seconds until tokens are available *) -val delay_until_available : t -> float -> float -(** Get number of seconds that need to pass until bucket is expected to have - enough tokens to fulfil the request - @param tb Token bucket - @param amount How many tokens we want to consume - @return Number of seconds until tokens are available -*) +(**/**) From 957307e6a87bd027a823d615890a7dd2e6c3bf08 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 1 Dec 2025 16:58:16 +0000 Subject: [PATCH 38/59] Rate limiting: token buckets with zero or negative fill rate fail Zero or negative rate limits can cause issues in the behaviour of rate limiting. In particular, zero fill rate leads to a division by zero in time calculations. Rather than account for this, we forbid the creation of token buckets with a bad fill rate by returning None. Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 9 +- ocaml/libs/rate-limit/bucket_table.mli | 14 ++- .../libs/rate-limit/test/test_token_bucket.ml | 117 ++++++++++-------- ocaml/libs/rate-limit/token_bucket.ml | 18 +-- ocaml/libs/rate-limit/token_bucket.mli | 4 +- ocaml/xapi/xapi_rate_limit.ml | 11 +- 6 files changed, 107 insertions(+), 66 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index d66ca353bab..b6b6e985056 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -17,8 +17,13 @@ type t = (string, Token_bucket.t) Hashtbl.t let create () = Hashtbl.create 16 let add_bucket table ~user_agent ~burst_size ~fill_rate = - let bucket = Token_bucket.create ~burst_size ~fill_rate in - Hashtbl.add table user_agent bucket + let bucket_option = Token_bucket.create ~burst_size ~fill_rate in + match bucket_option with + | Some bucket -> + Hashtbl.add table user_agent bucket ; + true + | None -> + false let delete_bucket table ~user_agent = Hashtbl.remove table user_agent diff --git a/ocaml/libs/rate-limit/bucket_table.mli b/ocaml/libs/rate-limit/bucket_table.mli index fdb6fd35332..eb29b9c54fc 100644 --- a/ocaml/libs/rate-limit/bucket_table.mli +++ b/ocaml/libs/rate-limit/bucket_table.mli @@ -12,17 +12,29 @@ * GNU Lesser General Public License for more details. *) +(** Hash table mapping client identifiers to their token buckets for rate limiting. *) type t = (string, Token_bucket.t) Hashtbl.t val create : unit -> t +(** [create ()] creates a new empty bucket table. *) val add_bucket : - t -> user_agent:string -> burst_size:float -> fill_rate:float -> unit + t -> user_agent:string -> burst_size:float -> fill_rate:float -> bool +(** [add_bucket table ~user_agent ~burst_size ~fill_rate] adds a token bucket + for the given user agent. Returns [false] if a bucket already exists, or if + the bucket configuration is invalid, e.g. negative fill rate. *) val peek : t -> user_agent:string -> float option +(** [peek table ~user_agent] returns the current token count for the user agent, + or [None] if no bucket exists. *) val delete_bucket : t -> user_agent:string -> unit +(** [delete_bucket table ~user_agent] removes the bucket for the user agent. *) val try_consume : t -> user_agent:string -> float -> bool +(** [try_consume table ~user_agent amount] attempts to consume tokens. + Returns [true] on success, [false] if insufficient tokens. *) val consume_and_block : t -> user_agent:string -> float -> unit +(** [consume_and_block table ~user_agent amount] consumes tokens, blocking + until sufficient tokens are available. *) diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.ml b/ocaml/libs/rate-limit/test/test_token_bucket.ml index e0cb8d69dc2..375be7c9b15 100644 --- a/ocaml/libs/rate-limit/test/test_token_bucket.ml +++ b/ocaml/libs/rate-limit/test/test_token_bucket.ml @@ -2,11 +2,22 @@ open Thread open Rate_limit open QCheck +let test_bad_fill_rate () = + let tb_zero = Token_bucket.create ~burst_size:1.0 ~fill_rate:0.0 in + Alcotest.(check bool) + "Creating a token bucket with 0 fill rate should fail" true (tb_zero = None) ; + let tb_negative = Token_bucket.create ~burst_size:1.0 ~fill_rate:~-.1.0 in + Alcotest.(check bool) + "Creating a token bucket with negative fill rate should fail" true + (tb_negative = None) + let test_consume_removes_correct_amount () = let initial_time = Mtime.Span.of_uint64_ns 0L in let tb = - Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 - ~fill_rate:2.0 + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + ) in Alcotest.(check (float 0.0)) @@ -25,8 +36,10 @@ let test_consume_removes_correct_amount () = let test_consume_more_than_available () = let initial_time = Mtime.Span.of_uint64_ns 0L in let tb = - Token_bucket.create_with_timestamp initial_time ~burst_size:5.0 - ~fill_rate:1.0 + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:5.0 + ~fill_rate:1.0 + ) in let _ = Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 4.0 in @@ -43,8 +56,10 @@ let test_consume_more_than_available () = let test_consume_refills_before_removing () = let initial_time = Mtime.Span.of_uint64_ns 0L in let tb = - Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 - ~fill_rate:2.0 + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + ) in let first_consume = @@ -67,8 +82,10 @@ let test_consume_refills_before_removing () = let test_peek_respects_burst_size () = let initial_time = Mtime.Span.of_uint64_ns 0L in let tb = - Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 - ~fill_rate:5.0 + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:5.0 + ) in let _ = Token_bucket.consume_with_timestamp (fun () -> initial_time) tb 8.0 in @@ -80,8 +97,10 @@ let test_peek_respects_burst_size () = let test_concurrent_access () = let tb = - Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:15.0 - ~fill_rate:0.0 + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:15.0 + ~fill_rate:0.01 + ) in let threads = Array.init 10 (fun _ -> @@ -101,14 +120,14 @@ let test_concurrent_access () = 5.0 let test_sleep () = - let tb = Token_bucket.create ~burst_size:20.0 ~fill_rate:5.0 in + let tb = Option.get (Token_bucket.create ~burst_size:20.0 ~fill_rate:5.0) in let _ = Token_bucket.consume tb 10.0 in Thread.delay 1.0 ; Alcotest.(check (float 0.2)) "Sleep 1 should refill token bucket by fill_rate" 15.0 (Token_bucket.peek tb) let test_system_time_versions () = - let tb = Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0 in + let tb = Option.get (Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0) in let initial_peek = Token_bucket.peek tb in Alcotest.(check (float 0.01)) @@ -122,7 +141,7 @@ let test_system_time_versions () = "After consume, should have 7 tokens" 7.0 after_consume_peek let test_concurrent_system_time () = - let tb = Token_bucket.create ~burst_size:100.0 ~fill_rate:10.0 in + let tb = Option.get (Token_bucket.create ~burst_size:100.0 ~fill_rate:10.0) in let num_threads = 20 in let consume_per_thread = 3 in @@ -149,8 +168,10 @@ let test_concurrent_system_time () = let test_consume_more_than_available_concurrent () = let tb = - Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 - ~fill_rate:0.0 + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 + ~fill_rate:0.1 + ) in let num_threads = 10 in let consume_per_thread = 1 in @@ -180,15 +201,17 @@ let test_consume_more_than_available_concurrent () = Alcotest.(check int) "Only 5 consumptions should succeed" 5 !successful_consumes ; - Alcotest.(check (float 0.0)) + Alcotest.(check (float 0.1)) "Bucket should be empty after consumptions" 0.0 (Token_bucket.peek_with_timestamp Mtime.Span.zero tb) let test_delay_until_available () = let initial_time = Mtime.Span.of_uint64_ns 0L in let tb = - Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 - ~fill_rate:2.0 + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size:10.0 + ~fill_rate:2.0 + ) in let _ = @@ -201,7 +224,9 @@ let test_delay_until_available () = Alcotest.(check (float 0.01)) "Delay for 4 tokens at 2 tokens/sec should be 2 seconds" 2.0 delay ; - let tb_fresh = Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0 in + let tb_fresh = + Option.get (Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0) + in let _ = Token_bucket.consume tb_fresh 10.0 in let delay_system = Token_bucket.delay_until_available tb_fresh 4.0 in @@ -209,34 +234,22 @@ let test_delay_until_available () = "System time delay should be approximately 2 seconds" 2.0 delay_system let test_edge_cases () = - let tb_zero_rate = - Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 - ~fill_rate:0.0 - in - let _ = - Token_bucket.consume_with_timestamp - (fun () -> Mtime.Span.zero) - tb_zero_rate 2.0 - in - let later_time = Mtime.Span.of_uint64_ns 1_000_000_000_000L in - let available = Token_bucket.peek_with_timestamp later_time tb_zero_rate in - Alcotest.(check (float 0.0)) - "Zero fill rate should not add tokens" 3.0 available ; - - let tb_zero_amount = - Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 - ~fill_rate:1.0 + let tb = + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:5.0 + ~fill_rate:1.0 + ) in let success = - Token_bucket.consume_with_timestamp - (fun () -> Mtime.Span.zero) - tb_zero_amount 0.0 + Token_bucket.consume_with_timestamp (fun () -> Mtime.Span.zero) tb 0.0 in Alcotest.(check bool) "Consuming zero tokens should succeed" true success ; let tb_small = - Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:1.0 - ~fill_rate:0.1 + Option.get + (Token_bucket.create_with_timestamp Mtime.Span.zero ~burst_size:1.0 + ~fill_rate:0.1 + ) in let success_small = Token_bucket.consume_with_timestamp @@ -244,14 +257,7 @@ let test_edge_cases () = tb_small 0.001 in Alcotest.(check bool) - "Consuming very small amount should succeed" true success_small ; - - let tb_zero = Token_bucket.create ~burst_size:0.0 ~fill_rate:0.0 in - let success_zero = Token_bucket.consume tb_zero 0.0 in - let success_small = Token_bucket.consume tb_zero 0.001 in - Alcotest.(check bool) "Consuming zero tokens should succeed" true success_zero ; - Alcotest.(check bool) - "Consuming very small amount should fail" false success_small + "Consuming very small amount should succeed" true success_small let test_consume_quickcheck = let open QCheck.Gen in @@ -289,7 +295,8 @@ let test_consume_quickcheck = let property (burst_size, fill_rate, operations) = let initial_time = Mtime.Span.of_uint64_ns 0L in let tb = - Token_bucket.create_with_timestamp initial_time ~burst_size ~fill_rate + Option.get + (Token_bucket.create_with_timestamp initial_time ~burst_size ~fill_rate) in let rec check_operations op_num time_ns last_refill_ns current_tokens ops = @@ -345,7 +352,9 @@ let test_consume_quickcheck = in let gen_all = - map3 (fun burst fill ops -> (burst, fill, ops)) pfloat pfloat gen_operations + map3 + (fun burst fill ops -> (burst, fill, ops)) + pfloat (float_range 1e-9 1e9) gen_operations in let arb_all = @@ -371,7 +380,11 @@ let test_consume_quickcheck = let test = [ - ( "Consume removes correct amount" + ( "A bucket with zero or negative fill rate cannot be created" + , `Quick + , test_bad_fill_rate + ) + ; ( "Consume removes correct amount" , `Quick , test_consume_removes_correct_amount ) diff --git a/ocaml/libs/rate-limit/token_bucket.ml b/ocaml/libs/rate-limit/token_bucket.ml index 8ccc047b306..81e2cfff93e 100644 --- a/ocaml/libs/rate-limit/token_bucket.ml +++ b/ocaml/libs/rate-limit/token_bucket.ml @@ -23,13 +23,17 @@ type t = { } let create_with_timestamp timestamp ~burst_size ~fill_rate = - { - burst_size - ; fill_rate - ; tokens= burst_size - ; last_refill= timestamp - ; mutex= Mutex.create () - } + if fill_rate <= 0. then + None + else + Some + { + burst_size + ; fill_rate + ; tokens= burst_size + ; last_refill= timestamp + ; mutex= Mutex.create () + } let create = create_with_timestamp (Mtime_clock.elapsed ()) diff --git a/ocaml/libs/rate-limit/token_bucket.mli b/ocaml/libs/rate-limit/token_bucket.mli index 783791a6917..58bbe8f0138 100644 --- a/ocaml/libs/rate-limit/token_bucket.mli +++ b/ocaml/libs/rate-limit/token_bucket.mli @@ -34,7 +34,7 @@ type t -val create : burst_size:float -> fill_rate:float -> t +val create : burst_size:float -> fill_rate:float -> t option (** Create token bucket with given parameters. @param burst_size Maximum number of tokens that can fit in the bucket @param fill_rate Number of tokens added to the bucket per second @@ -66,7 +66,7 @@ val delay_until_available : t -> float -> float (* Fuctions accepting a timestamp are meant for testing only *) val create_with_timestamp : - Mtime.span -> burst_size:float -> fill_rate:float -> t + Mtime.span -> burst_size:float -> fill_rate:float -> t option (** Create token bucket with given parameters and supplied inital timestamp @param timestamp Initial timestamp @param burst_size Maximum number of tokens that can fit in the bucket diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml index b86f6c0457d..63addecf153 100644 --- a/ocaml/xapi/xapi_rate_limit.ml +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -27,8 +27,15 @@ let register_xapi_globs () = "Adding user agent %s to bucket table with burst size %f and \ fill rate %f" user_agent burst_size fill_rate ; - Rate_limit.Bucket_table.add_bucket bucket_table ~user_agent - ~burst_size ~fill_rate + if + not + (Rate_limit.Bucket_table.add_bucket bucket_table ~user_agent + ~burst_size ~fill_rate + ) + then + D.error + "Bucket creation failed for user agent %s: invalid fill rate %f" + user_agent fill_rate | _ -> D.debug "Skipping invalid numeric values in: %s\n" s ) From 070f47b00a8c3cb1c0369e5df08a9e4579628811 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Tue, 2 Dec 2025 10:15:08 +0000 Subject: [PATCH 39/59] rate-limit: Write unit tests for bucket table Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/test/dune | 6 +- .../libs/rate-limit/test/test_bucket_table.ml | 174 ++++++++++++++++++ .../rate-limit/test/test_bucket_table.mli | 0 .../libs/rate-limit/test/test_token_bucket.ml | 5 +- 4 files changed, 179 insertions(+), 6 deletions(-) create mode 100644 ocaml/libs/rate-limit/test/test_bucket_table.ml create mode 100644 ocaml/libs/rate-limit/test/test_bucket_table.mli diff --git a/ocaml/libs/rate-limit/test/dune b/ocaml/libs/rate-limit/test/dune index 719cb056013..ca4afbe782a 100644 --- a/ocaml/libs/rate-limit/test/dune +++ b/ocaml/libs/rate-limit/test/dune @@ -1,4 +1,4 @@ -(test - (name test_token_bucket) +(tests + (names test_token_bucket test_bucket_table) (package rate-limit) - (libraries rate_limit alcotest qcheck-core qcheck-alcotest mtime mtime.clock.os fmt xapi-log threads.posix)) + (libraries rate_limit alcotest qcheck-core qcheck-alcotest mtime mtime.clock.os fmt xapi-log threads.posix)) \ No newline at end of file diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.ml b/ocaml/libs/rate-limit/test/test_bucket_table.ml new file mode 100644 index 00000000000..691476ad9a1 --- /dev/null +++ b/ocaml/libs/rate-limit/test/test_bucket_table.ml @@ -0,0 +1,174 @@ +open Rate_limit + +let test_create () = + let table = Bucket_table.create () in + Alcotest.(check (option (float 0.0))) + "Empty table returns None for peek" None + (Bucket_table.peek table ~user_agent:"test") + +let test_add_bucket () = + let table = Bucket_table.create () in + let success = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + Alcotest.(check bool) "Adding valid bucket should succeed" true success ; + Alcotest.(check (option (float 0.1))) + "Peek should return burst_size" (Some 10.0) + (Bucket_table.peek table ~user_agent:"agent1") + +let test_add_bucket_invalid () = + let table = Bucket_table.create () in + let success = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:0.0 + in + Alcotest.(check bool) + "Adding bucket with zero fill rate should fail" false success ; + let success_neg = + Bucket_table.add_bucket table ~user_agent:"agent2" ~burst_size:10.0 + ~fill_rate:(-1.0) + in + Alcotest.(check bool) + "Adding bucket with negative fill rate should fail" false success_neg + +let test_delete_bucket () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + Alcotest.(check (option (float 0.1))) + "Bucket exists before delete" (Some 10.0) + (Bucket_table.peek table ~user_agent:"agent1") ; + Bucket_table.delete_bucket table ~user_agent:"agent1" ; + Alcotest.(check (option (float 0.0))) + "Bucket removed after delete" None + (Bucket_table.peek table ~user_agent:"agent1") + +let test_delete_nonexistent () = + let table = Bucket_table.create () in + Bucket_table.delete_bucket table ~user_agent:"nonexistent" ; + Alcotest.(check pass) "Deleting nonexistent bucket should not raise" () () + +let test_try_consume () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + let success = Bucket_table.try_consume table ~user_agent:"agent1" 3.0 in + Alcotest.(check bool) "Consuming available tokens should succeed" true success ; + Alcotest.(check (option (float 0.1))) + "Tokens reduced after consume" (Some 7.0) + (Bucket_table.peek table ~user_agent:"agent1") + +let test_try_consume_insufficient () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:5.0 + ~fill_rate:1.0 + in + let success = Bucket_table.try_consume table ~user_agent:"agent1" 10.0 in + Alcotest.(check bool) + "Consuming more than available should fail" false success ; + Alcotest.(check (option (float 0.1))) + "Tokens unchanged after failed consume" (Some 5.0) + (Bucket_table.peek table ~user_agent:"agent1") + +let test_try_consume_nonexistent () = + let table = Bucket_table.create () in + let success = Bucket_table.try_consume table ~user_agent:"nonexistent" 1.0 in + Alcotest.(check bool) + "Consuming from nonexistent bucket should fail" false success + +let test_peek_nonexistent () = + let table = Bucket_table.create () in + Alcotest.(check (option (float 0.0))) + "Peek nonexistent bucket returns None" None + (Bucket_table.peek table ~user_agent:"nonexistent") + +let test_multiple_agents () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:2.0 + in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent2" ~burst_size:20.0 + ~fill_rate:5.0 + in + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + Alcotest.(check (option (float 0.1))) + "Agent1 tokens reduced" (Some 5.0) + (Bucket_table.peek table ~user_agent:"agent1") ; + Alcotest.(check (option (float 0.1))) + "Agent2 tokens unchanged" (Some 20.0) + (Bucket_table.peek table ~user_agent:"agent2") + +let test_consume_and_block () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:10.0 + in + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 10.0 in + let start_counter = Mtime_clock.counter () in + Bucket_table.consume_and_block table ~user_agent:"agent1" 5.0 ; + let elapsed_span = Mtime_clock.count start_counter in + let elapsed_seconds = Mtime.Span.to_float_ns elapsed_span *. 1e-9 in + Alcotest.(check (float 0.1)) + "consume_and_block should wait for tokens" elapsed_seconds 0.5 + +let test_consume_and_block_nonexistent () = + let table = Bucket_table.create () in + Bucket_table.consume_and_block table ~user_agent:"nonexistent" 1.0 ; + Alcotest.(check pass) + "consume_and_block on nonexistent bucket should not block" () () + +let test_concurrent_access () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:100.0 + ~fill_rate:0.01 + in + let successful_consumes = ref 0 in + let counter_mutex = Mutex.create () in + let threads = + Array.init 20 (fun _ -> + Thread.create + (fun () -> + let success = + Bucket_table.try_consume table ~user_agent:"agent1" 5.0 + in + if success then ( + Mutex.lock counter_mutex ; + incr successful_consumes ; + Mutex.unlock counter_mutex + ) + ) + () + ) + in + Array.iter Thread.join threads ; + Alcotest.(check int) + "Exactly 20 consumes should succeed" 20 !successful_consumes + +let test = + [ + ("Create empty table", `Quick, test_create) + ; ("Add valid bucket", `Quick, test_add_bucket) + ; ("Add invalid bucket", `Quick, test_add_bucket_invalid) + ; ("Delete bucket", `Quick, test_delete_bucket) + ; ("Delete nonexistent bucket", `Quick, test_delete_nonexistent) + ; ("Try consume", `Quick, test_try_consume) + ; ("Try consume insufficient", `Quick, test_try_consume_insufficient) + ; ("Try consume nonexistent", `Quick, test_try_consume_nonexistent) + ; ("Peek nonexistent", `Quick, test_peek_nonexistent) + ; ("Multiple agents", `Quick, test_multiple_agents) + ; ("Consume and block", `Slow, test_consume_and_block) + ; ("Consume and block nonexistent", `Quick, test_consume_and_block_nonexistent) + ; ("Concurrent access", `Quick, test_concurrent_access) + ] + +let () = Alcotest.run "Bucket table library" [("Bucket table tests", test)] diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.mli b/ocaml/libs/rate-limit/test/test_bucket_table.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.ml b/ocaml/libs/rate-limit/test/test_token_bucket.ml index 375be7c9b15..993be30e23f 100644 --- a/ocaml/libs/rate-limit/test/test_token_bucket.ml +++ b/ocaml/libs/rate-limit/test/test_token_bucket.ml @@ -1,6 +1,5 @@ open Thread open Rate_limit -open QCheck let test_bad_fill_rate () = let tb_zero = Token_bucket.create ~burst_size:1.0 ~fill_rate:0.0 in @@ -374,8 +373,8 @@ let test_consume_quickcheck = gen_all in - Test.make ~name:"Consume operations maintain correct token count" ~count:100 - arb_all (fun (burst, fill, ops) -> property (burst, fill, ops) + QCheck.Test.make ~name:"Consume operations maintain correct token count" + ~count:100 arb_all (fun (burst, fill, ops) -> property (burst, fill, ops) ) let test = From 276bc379a242f2ea26aed2c5a8990256e6efbd44 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Tue, 2 Dec 2025 14:36:48 +0000 Subject: [PATCH 40/59] rate-limit: Minor fixes to bucket table Make token bucket type abstract to hide Hashtbl.t Use `replace` rather than `add` for adding a new bucket Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 2 +- ocaml/libs/rate-limit/bucket_table.mli | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index b6b6e985056..1ef7a9d6fa4 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -20,7 +20,7 @@ let add_bucket table ~user_agent ~burst_size ~fill_rate = let bucket_option = Token_bucket.create ~burst_size ~fill_rate in match bucket_option with | Some bucket -> - Hashtbl.add table user_agent bucket ; + Hashtbl.replace table user_agent bucket ; true | None -> false diff --git a/ocaml/libs/rate-limit/bucket_table.mli b/ocaml/libs/rate-limit/bucket_table.mli index eb29b9c54fc..8b3bdc26f4f 100644 --- a/ocaml/libs/rate-limit/bucket_table.mli +++ b/ocaml/libs/rate-limit/bucket_table.mli @@ -13,7 +13,7 @@ *) (** Hash table mapping client identifiers to their token buckets for rate limiting. *) -type t = (string, Token_bucket.t) Hashtbl.t +type t val create : unit -> t (** [create ()] creates a new empty bucket table. *) From 0d6fab95a4021a423421929620cc612ec1a9b75b Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Thu, 4 Dec 2025 10:59:04 +0000 Subject: [PATCH 41/59] rate-limit: Add readers-writer lock to bucket table Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 84 ++++++-- .../libs/rate-limit/test/test_bucket_table.ml | 194 ++++++++++++++++-- 2 files changed, 237 insertions(+), 41 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index 1ef7a9d6fa4..2160d598235 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -12,33 +12,75 @@ * GNU Lesser General Public License for more details. *) -type t = (string, Token_bucket.t) Hashtbl.t +type t = { + table: (string, Token_bucket.t) Hashtbl.t + ; mutable readers: int + ; reader_count: Mutex.t (* protects readers count *) + ; resource: Mutex.t (* held collectively by readers, exclusively by writers *) +} -let create () = Hashtbl.create 16 +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute -let add_bucket table ~user_agent ~burst_size ~fill_rate = - let bucket_option = Token_bucket.create ~burst_size ~fill_rate in - match bucket_option with - | Some bucket -> - Hashtbl.replace table user_agent bucket ; - true - | None -> - false +let with_read_lock t f = + with_lock t.reader_count (fun () -> + t.readers <- t.readers + 1 ; + if t.readers = 1 then Mutex.lock t.resource + ) ; + Fun.protect f ~finally:(fun () -> + with_lock t.reader_count (fun () -> + t.readers <- t.readers - 1 ; + if t.readers = 0 then Mutex.unlock t.resource + ) + ) -let delete_bucket table ~user_agent = Hashtbl.remove table user_agent +let with_write_lock t f = with_lock t.resource f -let try_consume table ~user_agent amount = - match Hashtbl.find_opt table user_agent with - | None -> - false - | Some bucket -> - Token_bucket.consume bucket amount +let create () = + { + table= Hashtbl.create 10 + ; readers= 0 + ; reader_count= Mutex.create () + ; resource= Mutex.create () + } + +(* TODO: Indicate failure reason - did we get invalid config or try to add an + already present user_agent? *) +let add_bucket t ~user_agent ~burst_size ~fill_rate = + with_write_lock t (fun () -> + if Hashtbl.mem t.table user_agent then + false + else + match Token_bucket.create ~burst_size ~fill_rate with + | Some bucket -> + Hashtbl.add t.table user_agent bucket ; + true + | None -> + false + ) + +let delete_bucket t ~user_agent = + with_write_lock t (fun () -> Hashtbl.remove t.table user_agent) + +let try_consume t ~user_agent amount = + with_read_lock t (fun () -> + match Hashtbl.find_opt t.table user_agent with + | None -> + false + | Some bucket -> + Token_bucket.consume bucket amount + ) -let peek table ~user_agent = - Option.map Token_bucket.peek (Hashtbl.find_opt table user_agent) +let peek t ~user_agent = + with_read_lock t (fun () -> + Option.map Token_bucket.peek (Hashtbl.find_opt t.table user_agent) + ) -let consume_and_block table ~user_agent amount = - match Hashtbl.find_opt table user_agent with +(* TODO this has fairness issues - fix with queue or similar *) +let consume_and_block t ~user_agent amount = + let bucket_opt = + with_read_lock t (fun () -> Hashtbl.find_opt t.table user_agent) + in + match bucket_opt with | None -> () | Some bucket -> diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.ml b/ocaml/libs/rate-limit/test/test_bucket_table.ml index 691476ad9a1..70921ae0435 100644 --- a/ocaml/libs/rate-limit/test/test_bucket_table.ml +++ b/ocaml/libs/rate-limit/test/test_bucket_table.ml @@ -126,33 +126,185 @@ let test_consume_and_block_nonexistent () = Alcotest.(check pass) "consume_and_block on nonexistent bucket should not block" () () -let test_concurrent_access () = +let test_add_same_key_race () = + (* Test the check-then-act race in add_bucket. + add_bucket does: if not mem then add. Without locking, multiple threads + could all pass the mem check and try to add, but only one should succeed. + Note: OCaml 4's GIL makes races hard to trigger, but this test verifies + the invariant holds under concurrent access and would catch races if the + GIL is released at allocation points within the critical section. *) + let iterations = 500 in + let threads_per_iter = 10 in + let failures = ref 0 in + let failures_mutex = Mutex.create () in + for _ = 1 to iterations do + let table = Bucket_table.create () in + let success_count = ref 0 in + let count_mutex = Mutex.create () in + let barrier = ref 0 in + let barrier_mutex = Mutex.create () in + let threads = + Array.init threads_per_iter (fun _ -> + Thread.create + (fun () -> + (* Increment barrier and wait for all threads *) + Mutex.lock barrier_mutex ; + incr barrier ; + Mutex.unlock barrier_mutex ; + while + Mutex.lock barrier_mutex ; + let b = !barrier in + Mutex.unlock barrier_mutex ; b < threads_per_iter + do + Thread.yield () + done ; + (* All threads try to add the same key simultaneously *) + let success = + Bucket_table.add_bucket table ~user_agent:"contested_key" + ~burst_size:10.0 ~fill_rate:1.0 + in + if success then ( + Mutex.lock count_mutex ; + incr success_count ; + Mutex.unlock count_mutex + ) + ) + () + ) + in + Array.iter Thread.join threads ; + (* Exactly one thread should succeed in adding the key *) + if !success_count <> 1 then ( + Mutex.lock failures_mutex ; + incr failures ; + Mutex.unlock failures_mutex + ) + done ; + Alcotest.(check int) + "Exactly one add should succeed for same key (across all iterations)" 0 + !failures + +let test_concurrent_add_delete_stress () = + (* Stress test: rapidly add and delete entries. + Without proper locking, hashtable can get corrupted. *) let table = Bucket_table.create () in - let _ = - Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:100.0 - ~fill_rate:0.01 + let iterations = 1000 in + let num_keys = 10 in + let errors = ref 0 in + let errors_mutex = Mutex.create () in + let add_threads = + Array.init 5 (fun t -> + Thread.create + (fun () -> + for i = 0 to iterations - 1 do + let key = + Printf.sprintf "key%d" (((t * iterations) + i) mod num_keys) + in + let _ = + Bucket_table.add_bucket table ~user_agent:key ~burst_size:10.0 + ~fill_rate:1.0 + in + () + done + ) + () + ) in - let successful_consumes = ref 0 in - let counter_mutex = Mutex.create () in - let threads = - Array.init 20 (fun _ -> + let delete_threads = + Array.init 5 (fun t -> Thread.create (fun () -> - let success = - Bucket_table.try_consume table ~user_agent:"agent1" 5.0 - in - if success then ( - Mutex.lock counter_mutex ; - incr successful_consumes ; - Mutex.unlock counter_mutex - ) + for i = 0 to iterations - 1 do + let key = + Printf.sprintf "key%d" (((t * iterations) + i) mod num_keys) + in + Bucket_table.delete_bucket table ~user_agent:key + done ) () ) in - Array.iter Thread.join threads ; - Alcotest.(check int) - "Exactly 20 consumes should succeed" 20 !successful_consumes + let read_threads = + Array.init 5 (fun t -> + Thread.create + (fun () -> + for i = 0 to iterations - 1 do + let key = + Printf.sprintf "key%d" (((t * iterations) + i) mod num_keys) + in + (* This should never crash, even if key doesn't exist *) + try + let _ = Bucket_table.peek table ~user_agent:key in + () + with _ -> + Mutex.lock errors_mutex ; + incr errors ; + Mutex.unlock errors_mutex + done + ) + () + ) + in + Array.iter Thread.join add_threads ; + Array.iter Thread.join delete_threads ; + Array.iter Thread.join read_threads ; + Alcotest.(check int) "No errors during concurrent operations" 0 !errors + +let test_consume_during_delete_race () = + (* Test that try_consume doesn't crash when bucket is being deleted. + Without proper locking, we could try to access a deleted bucket. *) + let iterations = 500 in + let errors = ref 0 in + let errors_mutex = Mutex.create () in + for _ = 1 to iterations do + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"target" ~burst_size:100.0 + ~fill_rate:1.0 + in + let barrier = ref 0 in + let barrier_mutex = Mutex.create () in + let consumer = + Thread.create + (fun () -> + Mutex.lock barrier_mutex ; + incr barrier ; + Mutex.unlock barrier_mutex ; + while + Mutex.lock barrier_mutex ; + let b = !barrier in + Mutex.unlock barrier_mutex ; b < 2 + do + Thread.yield () + done ; + try + let _ = Bucket_table.try_consume table ~user_agent:"target" 1.0 in + () + with _ -> + Mutex.lock errors_mutex ; incr errors ; Mutex.unlock errors_mutex + ) + () + in + let deleter = + Thread.create + (fun () -> + Mutex.lock barrier_mutex ; + incr barrier ; + Mutex.unlock barrier_mutex ; + while + Mutex.lock barrier_mutex ; + let b = !barrier in + Mutex.unlock barrier_mutex ; b < 2 + do + Thread.yield () + done ; + Bucket_table.delete_bucket table ~user_agent:"target" + ) + () + in + Thread.join consumer ; Thread.join deleter + done ; + Alcotest.(check int) "No crashes during consume/delete race" 0 !errors let test = [ @@ -168,7 +320,9 @@ let test = ; ("Multiple agents", `Quick, test_multiple_agents) ; ("Consume and block", `Slow, test_consume_and_block) ; ("Consume and block nonexistent", `Quick, test_consume_and_block_nonexistent) - ; ("Concurrent access", `Quick, test_concurrent_access) + ; ("Add same key race", `Quick, test_add_same_key_race) + ; ("Concurrent add/delete stress", `Quick, test_concurrent_add_delete_stress) + ; ("Consume during delete race", `Quick, test_consume_during_delete_race) ] let () = Alcotest.run "Bucket table library" [("Bucket table tests", test)] From 3e63f2ccfcef3e62726972f9925741fb3882ec23 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Thu, 4 Dec 2025 16:39:50 +0000 Subject: [PATCH 42/59] rate-limit: Handle rate limited requests in FIFO queue The current implementation of rate limiting had severe fairness issues. These have been resolved through the addition of a request queue, to which rate limited requests are added. A worker thread sleeps until its associated token bucket has enough tokens to handle the request at the head of the queue, calls it, and sleeps until the next request is ready. Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 149 ++++++++++++++---- ocaml/libs/rate-limit/bucket_table.mli | 13 +- .../libs/rate-limit/test/test_bucket_table.ml | 94 +++++++++-- .../libs/rate-limit/test/test_token_bucket.ml | 4 +- ocaml/libs/rate-limit/token_bucket.ml | 15 +- ocaml/libs/rate-limit/token_bucket.mli | 6 +- ocaml/xapi/server_helpers.ml | 28 ++-- 7 files changed, 245 insertions(+), 64 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index 2160d598235..e604847ec78 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -12,37 +12,72 @@ * GNU Lesser General Public License for more details. *) +type rate_limit_data = { + bucket: Token_bucket.t + ; process_queue: + (float * (unit -> unit)) Queue.t (* contains token cost and callback *) + ; process_queue_lock: Mutex.t + ; worker_thread_cond: Condition.t + ; should_terminate: bool ref (* signal termination to worker thread *) + ; worker_thread: Thread.t +} +[@@warning "-69"] + type t = { - table: (string, Token_bucket.t) Hashtbl.t + table: (string, rate_limit_data) Hashtbl.t ; mutable readers: int - ; reader_count: Mutex.t (* protects readers count *) - ; resource: Mutex.t (* held collectively by readers, exclusively by writers *) + ; readers_lock: Mutex.t (* protects readers count *) + ; table_lock: Mutex.t + (* held collectively by readers, exclusively by writers *) } let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute let with_read_lock t f = - with_lock t.reader_count (fun () -> + with_lock t.readers_lock (fun () -> t.readers <- t.readers + 1 ; - if t.readers = 1 then Mutex.lock t.resource + if t.readers = 1 then Mutex.lock t.table_lock ) ; Fun.protect f ~finally:(fun () -> - with_lock t.reader_count (fun () -> + with_lock t.readers_lock (fun () -> t.readers <- t.readers - 1 ; - if t.readers = 0 then Mutex.unlock t.resource + if t.readers = 0 then Mutex.unlock t.table_lock ) ) -let with_write_lock t f = with_lock t.resource f +let with_write_lock t f = with_lock t.table_lock f let create () = { table= Hashtbl.create 10 ; readers= 0 - ; reader_count= Mutex.create () - ; resource= Mutex.create () + ; readers_lock= Mutex.create () + ; table_lock= Mutex.create () } +(* The worker thread is responsible for calling the callback when the token + amount becomes available *) +let rec worker_loop ~bucket ~process_queue ~process_queue_lock + ~worker_thread_cond ~should_terminate = + let process_item cost callback = + Token_bucket.delay_then_consume bucket cost ; + callback () + in + Mutex.lock process_queue_lock ; + while Queue.is_empty process_queue && not !should_terminate do + Condition.wait worker_thread_cond process_queue_lock + done ; + let item_opt = Queue.take_opt process_queue in + Mutex.unlock process_queue_lock ; + match item_opt with + | None -> + (* Queue is empty only when termination was signalled *) + () + | Some (cost, callback) -> + process_item cost callback ; + worker_loop ~bucket ~process_queue ~process_queue_lock ~worker_thread_cond + ~should_terminate + (* TODO: Indicate failure reason - did we get invalid config or try to add an already present user_agent? *) let add_bucket t ~user_agent ~burst_size ~fill_rate = @@ -52,43 +87,95 @@ let add_bucket t ~user_agent ~burst_size ~fill_rate = else match Token_bucket.create ~burst_size ~fill_rate with | Some bucket -> - Hashtbl.add t.table user_agent bucket ; + let process_queue = Queue.create () in + let process_queue_lock = Mutex.create () in + let worker_thread_cond = Condition.create () in + let should_terminate = ref false in + let worker_thread = + Thread.create + (fun () -> + worker_loop ~bucket ~process_queue ~process_queue_lock + ~worker_thread_cond ~should_terminate + ) + () + in + let data = + { + bucket + ; process_queue + ; process_queue_lock + ; worker_thread_cond + ; should_terminate + ; worker_thread + } + in + Hashtbl.add t.table user_agent data ; true | None -> false ) let delete_bucket t ~user_agent = - with_write_lock t (fun () -> Hashtbl.remove t.table user_agent) + with_write_lock t (fun () -> + match Hashtbl.find_opt t.table user_agent with + | None -> + () + | Some data -> + Mutex.lock data.process_queue_lock ; + data.should_terminate := true ; + Condition.signal data.worker_thread_cond ; + Mutex.unlock data.process_queue_lock ; + Hashtbl.remove t.table user_agent + ) let try_consume t ~user_agent amount = with_read_lock t (fun () -> match Hashtbl.find_opt t.table user_agent with | None -> false - | Some bucket -> - Token_bucket.consume bucket amount + | Some data -> + Token_bucket.consume data.bucket amount ) let peek t ~user_agent = with_read_lock t (fun () -> - Option.map Token_bucket.peek (Hashtbl.find_opt t.table user_agent) + Option.map + (fun contents -> Token_bucket.peek contents.bucket) + (Hashtbl.find_opt t.table user_agent) ) -(* TODO this has fairness issues - fix with queue or similar *) -let consume_and_block t ~user_agent amount = - let bucket_opt = - with_read_lock t (fun () -> Hashtbl.find_opt t.table user_agent) - in - match bucket_opt with +(* The callback should return quickly - if it is a longer task it is + responsible for creating a thread to do the task *) +let submit t ~user_agent ~callback amount = + match with_read_lock t (fun () -> Hashtbl.find_opt t.table user_agent) with | None -> - () - | Some bucket -> - let rec try_consume () = - if Token_bucket.consume bucket amount then - () - else - let wait_time = Token_bucket.delay_until_available bucket amount in - Thread.delay wait_time ; try_consume () - in - try_consume () + callback () + | Some {bucket; process_queue; process_queue_lock; worker_thread_cond; _} -> + with_lock process_queue_lock (fun () -> + if Queue.is_empty process_queue && Token_bucket.consume bucket amount + then + callback () + else + let need_signal = Queue.is_empty process_queue in + Queue.add (amount, callback) process_queue ; + if need_signal then Condition.signal worker_thread_cond + ) + +let submit_sync t ~user_agent ~callback amount = + let result = ref None in + let mutex = Mutex.create () in + let condition = Condition.create () in + let wrapped_callback () = + let r = callback () in + Mutex.lock mutex ; + result := Some r ; + Condition.signal condition ; + Mutex.unlock mutex + in + submit t ~user_agent ~callback:wrapped_callback amount ; + Mutex.lock mutex ; + while Option.is_none !result do + Condition.wait condition mutex + done ; + Mutex.unlock mutex ; + Option.get !result diff --git a/ocaml/libs/rate-limit/bucket_table.mli b/ocaml/libs/rate-limit/bucket_table.mli index 8b3bdc26f4f..7727b55940c 100644 --- a/ocaml/libs/rate-limit/bucket_table.mli +++ b/ocaml/libs/rate-limit/bucket_table.mli @@ -35,6 +35,13 @@ val try_consume : t -> user_agent:string -> float -> bool (** [try_consume table ~user_agent amount] attempts to consume tokens. Returns [true] on success, [false] if insufficient tokens. *) -val consume_and_block : t -> user_agent:string -> float -> unit -(** [consume_and_block table ~user_agent amount] consumes tokens, blocking - until sufficient tokens are available. *) +val submit : t -> user_agent:string -> callback:(unit -> unit) -> float -> unit +(** [submit table ~user_agent ~callback amount] submits a callback to be executed + under rate limiting. If tokens are immediately available and no callbacks are + queued, the callback runs synchronously. Otherwise, it is enqueued and will + be executed by a worker thread when tokens become available. Returns immediately. *) + +val submit_sync : t -> user_agent:string -> callback:(unit -> 'a) -> float -> 'a +(** [submit_sync table ~user_agent ~callback amount] submits a callback to be + executed under rate limiting and blocks until it completes, returning the + callback's result. *) diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.ml b/ocaml/libs/rate-limit/test/test_bucket_table.ml index 70921ae0435..03f8dd6aad9 100644 --- a/ocaml/libs/rate-limit/test/test_bucket_table.ml +++ b/ocaml/libs/rate-limit/test/test_bucket_table.ml @@ -106,25 +106,97 @@ let test_multiple_agents () = "Agent2 tokens unchanged" (Some 20.0) (Bucket_table.peek table ~user_agent:"agent2") -let test_consume_and_block () = +let test_submit () = let table = Bucket_table.create () in let _ = Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 ~fill_rate:10.0 in let _ = Bucket_table.try_consume table ~user_agent:"agent1" 10.0 in + let executed = ref false in let start_counter = Mtime_clock.counter () in - Bucket_table.consume_and_block table ~user_agent:"agent1" 5.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> executed := true) + 5.0 ; let elapsed_span = Mtime_clock.count start_counter in let elapsed_seconds = Mtime.Span.to_float_ns elapsed_span *. 1e-9 in - Alcotest.(check (float 0.1)) - "consume_and_block should wait for tokens" elapsed_seconds 0.5 + (* submit should return immediately (non-blocking) *) + Alcotest.(check bool) "submit returns immediately" true (elapsed_seconds < 0.1) ; + (* Wait for callback to be executed by worker *) + Thread.delay 0.6 ; + Alcotest.(check bool) "callback eventually executed" true !executed -let test_consume_and_block_nonexistent () = +let test_submit_nonexistent () = let table = Bucket_table.create () in - Bucket_table.consume_and_block table ~user_agent:"nonexistent" 1.0 ; - Alcotest.(check pass) - "consume_and_block on nonexistent bucket should not block" () () + let executed = ref false in + Bucket_table.submit table ~user_agent:"nonexistent" + ~callback:(fun () -> executed := true) + 1.0 ; + Alcotest.(check bool) + "submit on nonexistent bucket runs callback immediately" true !executed + +let test_submit_fairness () = + (* Test that callbacks are executed in FIFO order regardless of token cost *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:5.0 + ~fill_rate:5.0 + in + (* Drain the bucket *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + let execution_order = ref [] in + let order_mutex = Mutex.create () in + let record_execution id = + Mutex.lock order_mutex ; + execution_order := id :: !execution_order ; + Mutex.unlock order_mutex + in + (* Submit callbacks with varying costs - order should be preserved *) + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 1) + 1.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 2) + 3.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 3) + 1.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 4) + 2.0 ; + (* Wait for all callbacks to complete (total cost = 7 tokens, rate = 5/s) *) + Thread.delay 2.0 ; + let order = List.rev !execution_order in + Alcotest.(check (list int)) + "callbacks execute in FIFO order" [1; 2; 3; 4] order + +let test_submit_sync () = + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:10.0 + ~fill_rate:10.0 + in + (* Test 1: Returns callback result immediately when tokens available *) + let result = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> 42) + 5.0 + in + Alcotest.(check int) "returns callback result" 42 result ; + (* Test 2: Blocks and waits for tokens, then returns result *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + (* drain bucket *) + let start_counter = Mtime_clock.counter () in + let result2 = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> "hello") + 5.0 + in + let elapsed_span = Mtime_clock.count start_counter in + let elapsed_seconds = Mtime.Span.to_float_ns elapsed_span *. 1e-9 in + Alcotest.(check string) "returns string result" "hello" result2 ; + Alcotest.(check bool) + "blocked waiting for tokens" true (elapsed_seconds >= 0.4) let test_add_same_key_race () = (* Test the check-then-act race in add_bucket. @@ -318,8 +390,10 @@ let test = ; ("Try consume nonexistent", `Quick, test_try_consume_nonexistent) ; ("Peek nonexistent", `Quick, test_peek_nonexistent) ; ("Multiple agents", `Quick, test_multiple_agents) - ; ("Consume and block", `Slow, test_consume_and_block) - ; ("Consume and block nonexistent", `Quick, test_consume_and_block_nonexistent) + ; ("Submit", `Slow, test_submit) + ; ("Submit nonexistent", `Quick, test_submit_nonexistent) + ; ("Submit fairness", `Slow, test_submit_fairness) + ; ("Submit sync", `Slow, test_submit_sync) ; ("Add same key race", `Quick, test_add_same_key_race) ; ("Concurrent add/delete stress", `Quick, test_concurrent_add_delete_stress) ; ("Consume during delete race", `Quick, test_consume_during_delete_race) diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.ml b/ocaml/libs/rate-limit/test/test_token_bucket.ml index 993be30e23f..80bb1b66681 100644 --- a/ocaml/libs/rate-limit/test/test_token_bucket.ml +++ b/ocaml/libs/rate-limit/test/test_token_bucket.ml @@ -218,7 +218,7 @@ let test_delay_until_available () = in let delay = - Token_bucket.delay_until_available_timestamp initial_time tb 4.0 + Token_bucket.get_delay_until_available_timestamp initial_time tb 4.0 in Alcotest.(check (float 0.01)) "Delay for 4 tokens at 2 tokens/sec should be 2 seconds" 2.0 delay ; @@ -227,7 +227,7 @@ let test_delay_until_available () = Option.get (Token_bucket.create ~burst_size:10.0 ~fill_rate:2.0) in let _ = Token_bucket.consume tb_fresh 10.0 in - let delay_system = Token_bucket.delay_until_available tb_fresh 4.0 in + let delay_system = Token_bucket.get_delay_until_available tb_fresh 4.0 in Alcotest.(check (float 0.1)) "System time delay should be approximately 2 seconds" 2.0 delay_system diff --git a/ocaml/libs/rate-limit/token_bucket.ml b/ocaml/libs/rate-limit/token_bucket.ml index 81e2cfff93e..691752572c6 100644 --- a/ocaml/libs/rate-limit/token_bucket.ml +++ b/ocaml/libs/rate-limit/token_bucket.ml @@ -61,10 +61,19 @@ let consume_with_timestamp get_time tb amount = let consume = consume_with_timestamp Mtime_clock.elapsed -let delay_until_available_timestamp timestamp tb amount = +let get_delay_until_available_timestamp timestamp tb amount = let current_tokens = peek_with_timestamp timestamp tb in let required_tokens = max 0. (amount -. current_tokens) in required_tokens /. tb.fill_rate -let delay_until_available tb amount = - delay_until_available_timestamp (Mtime_clock.elapsed ()) tb amount +let get_delay_until_available tb amount = + get_delay_until_available_timestamp (Mtime_clock.elapsed ()) tb amount + +(* This implementation only works when there is only one thread trying to + consume - fairness needs to be implemented on top of it with a queue. + If there is no contention, it should only delay once. *) +let rec delay_then_consume tb amount = + if not (consume tb amount) then ( + Thread.delay (get_delay_until_available tb amount) ; + delay_then_consume tb amount + ) diff --git a/ocaml/libs/rate-limit/token_bucket.mli b/ocaml/libs/rate-limit/token_bucket.mli index 58bbe8f0138..2b7a9cce8af 100644 --- a/ocaml/libs/rate-limit/token_bucket.mli +++ b/ocaml/libs/rate-limit/token_bucket.mli @@ -53,7 +53,7 @@ val consume : t -> float -> bool @return Whether the tokens were successfully consumed *) -val delay_until_available : t -> float -> float +val get_delay_until_available : t -> float -> float (** Get number of seconds that need to pass until bucket is expected to have enough tokens to fulfil the request @param tb Token bucket @@ -61,6 +61,8 @@ val delay_until_available : t -> float -> float @return Number of seconds until tokens are available *) +val delay_then_consume : t -> float -> unit + (**/**) (* Fuctions accepting a timestamp are meant for testing only *) @@ -90,7 +92,7 @@ val consume_with_timestamp : (unit -> Mtime.span) -> t -> float -> bool @return Whether the tokens were successfully consumed *) -val delay_until_available_timestamp : Mtime.span -> t -> float -> float +val get_delay_until_available_timestamp : Mtime.span -> t -> float -> float (** Get number of seconds that need to pass until bucket is expected to have enough tokens to fulfil the request @param timestamp diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 5ac0127e209..3601f58cbea 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -189,26 +189,28 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name Rate_limit.Bucket_table.peek Xapi_rate_limit.bucket_table ~user_agent ) in - ( match user_agent_option with + let callback () = + match sync_ty with + | `Sync -> + sync () + | `Async -> + let need_complete = not (Context.forwarded_task __context) in + async ~need_complete + | `InternalAsync -> + async ~need_complete:true + in + match user_agent_option with | Some user_agent -> D.debug "Bucket table: Expecting to consume %f tokens from user_agent %s" (Option.value peek_result ~default:0.) user_agent ; - Rate_limit.Bucket_table.consume_and_block Xapi_rate_limit.bucket_table + Rate_limit.Bucket_table.submit_sync Xapi_rate_limit.bucket_table ~user_agent:(Option.value http_req.user_agent ~default:"") - 1. + ~callback 1. | None -> - D.debug "Bucket table: user_agent was None, not throttling" - ) ; - match sync_ty with - | `Sync -> - sync () - | `Async -> - let need_complete = not (Context.forwarded_task __context) in - async ~need_complete - | `InternalAsync -> - async ~need_complete:true + D.debug "Bucket table: user_agent was None, not throttling" ; + callback () (* regardless of forwarding, we are expected to complete the task *) From b6b01731603a85426a53f0c82436de785a7e7c00 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Fri, 5 Dec 2025 11:11:56 +0000 Subject: [PATCH 43/59] rate-limit: Replace readers-writer lock with atomic Map Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 141 +++++++----------- .../libs/rate-limit/test/test_bucket_table.ml | 59 -------- 2 files changed, 57 insertions(+), 143 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index e604847ec78..1013c2b382e 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -23,37 +23,13 @@ type rate_limit_data = { } [@@warning "-69"] -type t = { - table: (string, rate_limit_data) Hashtbl.t - ; mutable readers: int - ; readers_lock: Mutex.t (* protects readers count *) - ; table_lock: Mutex.t - (* held collectively by readers, exclusively by writers *) -} - -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute +module StringMap = Map.Make (String) -let with_read_lock t f = - with_lock t.readers_lock (fun () -> - t.readers <- t.readers + 1 ; - if t.readers = 1 then Mutex.lock t.table_lock - ) ; - Fun.protect f ~finally:(fun () -> - with_lock t.readers_lock (fun () -> - t.readers <- t.readers - 1 ; - if t.readers = 0 then Mutex.unlock t.table_lock - ) - ) +type t = rate_limit_data StringMap.t Atomic.t -let with_write_lock t f = with_lock t.table_lock f +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute -let create () = - { - table= Hashtbl.create 10 - ; readers= 0 - ; readers_lock= Mutex.create () - ; table_lock= Mutex.create () - } +let create () = Atomic.make StringMap.empty (* The worker thread is responsible for calling the callback when the token amount becomes available *) @@ -81,73 +57,70 @@ let rec worker_loop ~bucket ~process_queue ~process_queue_lock (* TODO: Indicate failure reason - did we get invalid config or try to add an already present user_agent? *) let add_bucket t ~user_agent ~burst_size ~fill_rate = - with_write_lock t (fun () -> - if Hashtbl.mem t.table user_agent then + let map = Atomic.get t in + if StringMap.mem user_agent map then + false + else + match Token_bucket.create ~burst_size ~fill_rate with + | Some bucket -> + let process_queue = Queue.create () in + let process_queue_lock = Mutex.create () in + let worker_thread_cond = Condition.create () in + let should_terminate = ref false in + let worker_thread = + Thread.create + (fun () -> + worker_loop ~bucket ~process_queue ~process_queue_lock + ~worker_thread_cond ~should_terminate + ) + () + in + let data = + { + bucket + ; process_queue + ; process_queue_lock + ; worker_thread_cond + ; should_terminate + ; worker_thread + } + in + let updated_map = StringMap.add user_agent data map in + Atomic.set t updated_map ; true + | None -> false - else - match Token_bucket.create ~burst_size ~fill_rate with - | Some bucket -> - let process_queue = Queue.create () in - let process_queue_lock = Mutex.create () in - let worker_thread_cond = Condition.create () in - let should_terminate = ref false in - let worker_thread = - Thread.create - (fun () -> - worker_loop ~bucket ~process_queue ~process_queue_lock - ~worker_thread_cond ~should_terminate - ) - () - in - let data = - { - bucket - ; process_queue - ; process_queue_lock - ; worker_thread_cond - ; should_terminate - ; worker_thread - } - in - Hashtbl.add t.table user_agent data ; - true - | None -> - false - ) let delete_bucket t ~user_agent = - with_write_lock t (fun () -> - match Hashtbl.find_opt t.table user_agent with - | None -> - () - | Some data -> - Mutex.lock data.process_queue_lock ; - data.should_terminate := true ; - Condition.signal data.worker_thread_cond ; - Mutex.unlock data.process_queue_lock ; - Hashtbl.remove t.table user_agent - ) + let map = Atomic.get t in + match StringMap.find_opt user_agent map with + | None -> + () + | Some data -> + Mutex.lock data.process_queue_lock ; + data.should_terminate := true ; + Condition.signal data.worker_thread_cond ; + Mutex.unlock data.process_queue_lock ; + Atomic.set t (StringMap.remove user_agent map) let try_consume t ~user_agent amount = - with_read_lock t (fun () -> - match Hashtbl.find_opt t.table user_agent with - | None -> - false - | Some data -> - Token_bucket.consume data.bucket amount - ) + let map = Atomic.get t in + match StringMap.find_opt user_agent map with + | None -> + false + | Some data -> + Token_bucket.consume data.bucket amount let peek t ~user_agent = - with_read_lock t (fun () -> - Option.map - (fun contents -> Token_bucket.peek contents.bucket) - (Hashtbl.find_opt t.table user_agent) - ) + let map = Atomic.get t in + Option.map + (fun contents -> Token_bucket.peek contents.bucket) + (StringMap.find_opt user_agent map) (* The callback should return quickly - if it is a longer task it is responsible for creating a thread to do the task *) let submit t ~user_agent ~callback amount = - match with_read_lock t (fun () -> Hashtbl.find_opt t.table user_agent) with + let map = Atomic.get t in + match StringMap.find_opt user_agent map with | None -> callback () | Some {bucket; process_queue; process_queue_lock; worker_thread_cond; _} -> diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.ml b/ocaml/libs/rate-limit/test/test_bucket_table.ml index 03f8dd6aad9..17da5d3cd5c 100644 --- a/ocaml/libs/rate-limit/test/test_bucket_table.ml +++ b/ocaml/libs/rate-limit/test/test_bucket_table.ml @@ -198,64 +198,6 @@ let test_submit_sync () = Alcotest.(check bool) "blocked waiting for tokens" true (elapsed_seconds >= 0.4) -let test_add_same_key_race () = - (* Test the check-then-act race in add_bucket. - add_bucket does: if not mem then add. Without locking, multiple threads - could all pass the mem check and try to add, but only one should succeed. - Note: OCaml 4's GIL makes races hard to trigger, but this test verifies - the invariant holds under concurrent access and would catch races if the - GIL is released at allocation points within the critical section. *) - let iterations = 500 in - let threads_per_iter = 10 in - let failures = ref 0 in - let failures_mutex = Mutex.create () in - for _ = 1 to iterations do - let table = Bucket_table.create () in - let success_count = ref 0 in - let count_mutex = Mutex.create () in - let barrier = ref 0 in - let barrier_mutex = Mutex.create () in - let threads = - Array.init threads_per_iter (fun _ -> - Thread.create - (fun () -> - (* Increment barrier and wait for all threads *) - Mutex.lock barrier_mutex ; - incr barrier ; - Mutex.unlock barrier_mutex ; - while - Mutex.lock barrier_mutex ; - let b = !barrier in - Mutex.unlock barrier_mutex ; b < threads_per_iter - do - Thread.yield () - done ; - (* All threads try to add the same key simultaneously *) - let success = - Bucket_table.add_bucket table ~user_agent:"contested_key" - ~burst_size:10.0 ~fill_rate:1.0 - in - if success then ( - Mutex.lock count_mutex ; - incr success_count ; - Mutex.unlock count_mutex - ) - ) - () - ) - in - Array.iter Thread.join threads ; - (* Exactly one thread should succeed in adding the key *) - if !success_count <> 1 then ( - Mutex.lock failures_mutex ; - incr failures ; - Mutex.unlock failures_mutex - ) - done ; - Alcotest.(check int) - "Exactly one add should succeed for same key (across all iterations)" 0 - !failures - let test_concurrent_add_delete_stress () = (* Stress test: rapidly add and delete entries. Without proper locking, hashtable can get corrupted. *) @@ -394,7 +336,6 @@ let test = ; ("Submit nonexistent", `Quick, test_submit_nonexistent) ; ("Submit fairness", `Slow, test_submit_fairness) ; ("Submit sync", `Slow, test_submit_sync) - ; ("Add same key race", `Quick, test_add_same_key_race) ; ("Concurrent add/delete stress", `Quick, test_concurrent_add_delete_stress) ; ("Consume during delete race", `Quick, test_consume_during_delete_race) ] From 369f0d2547e59199c4e8fbd4a749fc66f9f18ec5 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Fri, 5 Dec 2025 11:22:34 +0000 Subject: [PATCH 44/59] rate-limit: Clarify token bucket creation docs Creating a token bucket fails if the rate limit supplied is 0 or negative - this can lead to unexpected and undesirable behaviour, such as division by 0 or negative token counts. Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/token_bucket.mli | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/rate-limit/token_bucket.mli b/ocaml/libs/rate-limit/token_bucket.mli index 2b7a9cce8af..d04f4fd6174 100644 --- a/ocaml/libs/rate-limit/token_bucket.mli +++ b/ocaml/libs/rate-limit/token_bucket.mli @@ -36,6 +36,7 @@ type t val create : burst_size:float -> fill_rate:float -> t option (** Create token bucket with given parameters. + Returns None if the fill rate is 0 or negative. @param burst_size Maximum number of tokens that can fit in the bucket @param fill_rate Number of tokens added to the bucket per second *) @@ -70,6 +71,7 @@ val delay_then_consume : t -> float -> unit val create_with_timestamp : Mtime.span -> burst_size:float -> fill_rate:float -> t option (** Create token bucket with given parameters and supplied inital timestamp + Returns None if the fill_rate is 0 or negative. @param timestamp Initial timestamp @param burst_size Maximum number of tokens that can fit in the bucket @param fill_rate Number of tokens added to the bucket per second @@ -77,7 +79,7 @@ val create_with_timestamp : val peek_with_timestamp : Mtime.span -> t -> float (** Retrieve token amount in token bucket at given timestamp. - Undefined behaviour when [timestamp] <= [tb.timestamp] + Undefined behaviour when [timestamp] <= [tb.timestamp] @param timestamp Current time @param tb Token bucket @return Amount of tokens in the token bucket From 028b505a22f0acc727ce4d3fd06b4710120afc64 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Tue, 2 Dec 2025 14:47:31 +0000 Subject: [PATCH 45/59] idl: Add Rate_limit datamodel Signed-off-by: Christian Pardillo Laursen --- ocaml/idl/datamodel.ml | 2 + ocaml/idl/datamodel_common.ml | 2 + ocaml/idl/datamodel_lifecycle.ml | 10 + ocaml/idl/datamodel_rate_limit.ml | 40 +++ ocaml/idl/dune | 2 +- ocaml/idl/schematest.ml | 2 +- ocaml/libs/rate-limit/bucket_table.ml | 4 + ocaml/libs/rate-limit/bucket_table.mli | 6 +- .../libs/rate-limit/test/test_token_bucket.ml | 2 +- ocaml/libs/uuid/uuidx.ml | 1 + ocaml/libs/uuid/uuidx.mli | 1 + ocaml/xapi/api_server_common.ml | 1 + ocaml/xapi/message_forwarding.ml | 2 + ocaml/xapi/xapi.ml | 8 +- ocaml/xapi/xapi_rate_limit.ml | 45 +++- ocaml/xapi/xapi_rate_limit.mli | 11 +- ocaml/xapi/xapi_session.mli | 2 - quality-gate.sh | 238 +++++++++--------- 18 files changed, 245 insertions(+), 134 deletions(-) create mode 100644 ocaml/idl/datamodel_rate_limit.ml diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index bfe326e4356..d39a970a49e 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -10535,6 +10535,7 @@ let all_system = ; Datamodel_vm_group.t ; Datamodel_host_driver.t ; Datamodel_driver_variant.t + ; Datamodel_rate_limit.t ] (* If the relation is one-to-many, the "many" nodes (one edge each) must come before the "one" node (many edges) *) @@ -10786,6 +10787,7 @@ let expose_get_all_messages_for = ; _observer ; _host_driver ; _driver_variant + ; _rate_limit ] let no_task_id_for = [_task; (* _alert; *) _event] diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 12c548580b1..efe0cb5f2cd 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -315,6 +315,8 @@ let _host_driver = "Host_driver" let _driver_variant = "Driver_variant" +let _rate_limit = "Rate_limit" + let update_guidances = Enum ( "update_guidances" diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index a98e52d1dd0..9d5f2820121 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -1,4 +1,6 @@ let prototyped_of_class = function + | "Rate_limit" -> + Some "25.38.0-next" | "Driver_variant" -> Some "25.2.0" | "Host_driver" -> @@ -13,6 +15,14 @@ let prototyped_of_class = function None let prototyped_of_field = function + | "Rate_limit", "fill_rate" -> + Some "25.38.0-next" + | "Rate_limit", "burst_size" -> + Some "25.38.0-next" + | "Rate_limit", "client_id" -> + Some "25.38.0-next" + | "Rate_limit", "uuid" -> + Some "25.38.0-next" | "Driver_variant", "status" -> Some "25.2.0" | "Driver_variant", "priority" -> diff --git a/ocaml/idl/datamodel_rate_limit.ml b/ocaml/idl/datamodel_rate_limit.ml new file mode 100644 index 00000000000..f870ba73a1a --- /dev/null +++ b/ocaml/idl/datamodel_rate_limit.ml @@ -0,0 +1,40 @@ +(* + * Copyright (C) 2023 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Datamodel_types +open Datamodel_common +open Datamodel_roles + +let lifecycle = [] + +let t = + create_obj ~name:_rate_limit ~descr:"Rate limiting policy for a XAPI client" + ~doccomments:[] ~gen_constructor_destructor:true ~gen_events:true + ~in_db:true ~lifecycle:[] ~persist:PersistEverything ~in_oss_since:None + ~messages_default_allowed_roles:_R_POOL_ADMIN + ~contents: + ([uid _rate_limit ~lifecycle] + @ [ + field ~qualifier:StaticRO ~ty:String ~lifecycle "client_id" + "An identifier for the rate limited client" ~ignore_foreign_key:true + ~default_value:(Some (VString "")) + ; field ~qualifier:StaticRO ~ty:Float ~lifecycle "burst_size" + "Amount of tokens that can be consumed in one burst" + ~ignore_foreign_key:true ~default_value:(Some (VFloat 0.)) + ; field ~qualifier:StaticRO ~ty:Float ~lifecycle "fill_rate" + "Tokens added to token bucket per second" ~ignore_foreign_key:true + ~default_value:(Some (VFloat 0.)) + ] + ) + ~messages:[] () diff --git a/ocaml/idl/dune b/ocaml/idl/dune index ac591ae1e0f..eb55c786d40 100644 --- a/ocaml/idl/dune +++ b/ocaml/idl/dune @@ -7,7 +7,7 @@ datamodel_values datamodel_schema datamodel_certificate datamodel_diagnostics datamodel_repository datamodel_lifecycle datamodel_vtpm datamodel_observer datamodel_vm_group api_version - datamodel_host_driver datamodel_driver_variant) + datamodel_host_driver datamodel_driver_variant datamodel_rate_limit) (libraries rpclib.core sexplib0 diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 9411d1c3b42..2b70bf725b6 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "3b20f4304cfaaa7b6213af91ae632e64" +let last_known_schema_hash = "4708cb1f0cf7c1231c6958590ee1ed04" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index 1013c2b382e..a35bcb6ac2e 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -31,6 +31,10 @@ let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute let create () = Atomic.make StringMap.empty +let mem t ~user_agent = + let map = Atomic.get t in + StringMap.mem user_agent map + (* The worker thread is responsible for calling the callback when the token amount becomes available *) let rec worker_loop ~bucket ~process_queue ~process_queue_lock diff --git a/ocaml/libs/rate-limit/bucket_table.mli b/ocaml/libs/rate-limit/bucket_table.mli index 7727b55940c..87bb5f49bd9 100644 --- a/ocaml/libs/rate-limit/bucket_table.mli +++ b/ocaml/libs/rate-limit/bucket_table.mli @@ -22,7 +22,11 @@ val add_bucket : t -> user_agent:string -> burst_size:float -> fill_rate:float -> bool (** [add_bucket table ~user_agent ~burst_size ~fill_rate] adds a token bucket for the given user agent. Returns [false] if a bucket already exists, or if - the bucket configuration is invalid, e.g. negative fill rate. *) + the bucket configuration is invalid, e.g. negative/zero fill rate. *) + +val mem : t -> user_agent:string -> bool +(** [mem table ~user_agent] returns whether [user_agent] has an associated + token bucket in the bucket table *) val peek : t -> user_agent:string -> float option (** [peek table ~user_agent] returns the current token count for the user agent, diff --git a/ocaml/libs/rate-limit/test/test_token_bucket.ml b/ocaml/libs/rate-limit/test/test_token_bucket.ml index 80bb1b66681..2cd3a7992ef 100644 --- a/ocaml/libs/rate-limit/test/test_token_bucket.ml +++ b/ocaml/libs/rate-limit/test/test_token_bucket.ml @@ -122,7 +122,7 @@ let test_sleep () = let tb = Option.get (Token_bucket.create ~burst_size:20.0 ~fill_rate:5.0) in let _ = Token_bucket.consume tb 10.0 in Thread.delay 1.0 ; - Alcotest.(check (float 0.2)) + Alcotest.(check (float 0.5)) "Sleep 1 should refill token bucket by fill_rate" 15.0 (Token_bucket.peek tb) let test_system_time_versions () = diff --git a/ocaml/libs/uuid/uuidx.ml b/ocaml/libs/uuid/uuidx.ml index b22c22ebd14..8ae23a84052 100644 --- a/ocaml/libs/uuid/uuidx.ml +++ b/ocaml/libs/uuid/uuidx.ml @@ -64,6 +64,7 @@ type without_secret = | `sr_stat | `subject | `task + | `Rate_limit | `tunnel | `USB_group | `user diff --git a/ocaml/libs/uuid/uuidx.mli b/ocaml/libs/uuid/uuidx.mli index bd0865cf628..e3346480998 100644 --- a/ocaml/libs/uuid/uuidx.mli +++ b/ocaml/libs/uuid/uuidx.mli @@ -75,6 +75,7 @@ type without_secret = | `sr_stat | `subject | `task + | `Rate_limit | `tunnel | `USB_group | `user diff --git a/ocaml/xapi/api_server_common.ml b/ocaml/xapi/api_server_common.ml index f4167c1f36a..043a3bc96c8 100644 --- a/ocaml/xapi/api_server_common.ml +++ b/ocaml/xapi/api_server_common.ml @@ -132,6 +132,7 @@ module Actions = struct module Observer = Xapi_observer module Host_driver = Xapi_host_driver module Driver_variant = Xapi_host_driver.Variant + module Rate_limit = Xapi_rate_limit end (** Use the server functor to make an XML-RPC dispatcher. *) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 060195e120a..f97f6e96930 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -6707,6 +6707,8 @@ functor in Xapi_pool_helpers.call_fn_on_slaves_then_master ~__context fn end + + module Rate_limit = Xapi_rate_limit end (* for unit tests *) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index c45ba121528..a60d73fc883 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -884,11 +884,9 @@ let listen_unix_socket sock_path = Unixext.mkdir_safe (Filename.dirname sock_path) 0o700 ; Unixext.unlink_safe sock_path ; let domain_sock = Xapi_http.bind (Unix.ADDR_UNIX sock_path) in - ignore - (Http_svr.start - ~conn_limit:!Xapi_globs.conn_limit_unix - Xapi_http.server domain_sock - ) + Http_svr.start + ~conn_limit:!Xapi_globs.conn_limit_unix + Xapi_http.server domain_sock let set_stunnel_timeout () = try diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml index 63addecf153..1bc7b34a345 100644 --- a/ocaml/xapi/xapi_rate_limit.ml +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2025 Citrix Systems Inc. + * Copyright (C) Citrix Systems Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -13,7 +13,48 @@ *) module D = Debug.Make (struct let name = "xapi_rate_limit" end) -let bucket_table = Rate_limit.Bucket_table.create () +open Rate_limit + +let bucket_table = Bucket_table.create () + +let create ~__context ~client_id ~burst_size ~fill_rate = + if Bucket_table.mem bucket_table ~user_agent:client_id then + raise + Api_errors.( + Server_error + ( map_duplicate_key + , ["client_id"; client_id; "client_id already registered"] + ) + ) ; + let uuid = Uuidx.make () in + let ref = Ref.make () in + let add_bucket_succeeded = + Bucket_table.add_bucket bucket_table ~user_agent:client_id ~burst_size + ~fill_rate + in + match add_bucket_succeeded with + | true -> + Db.Rate_limit.create ~__context ~ref ~uuid:(Uuidx.to_string uuid) + ~client_id ~burst_size ~fill_rate ; + ref + | false -> + raise + Api_errors.( + Server_error + ( invalid_value + , [ + "fill_rate" + ; string_of_float fill_rate + ; "Fill rate cannot be 0 or negative" + ] + ) + ) + +let destroy ~__context ~self = + let record = Db.Rate_limit.get_record ~__context ~self in + Bucket_table.delete_bucket bucket_table + ~user_agent:record.rate_limit_client_id ; + Db.Rate_limit.destroy ~__context ~self let register_xapi_globs () = let configs = !Xapi_globs.rate_limited_clients in diff --git a/ocaml/xapi/xapi_rate_limit.mli b/ocaml/xapi/xapi_rate_limit.mli index f9b48bd83bb..69a4bed2b86 100644 --- a/ocaml/xapi/xapi_rate_limit.mli +++ b/ocaml/xapi/xapi_rate_limit.mli @@ -1,5 +1,5 @@ (* - * Copyright (C) 2025 Citrix Systems Inc. + * Copyright (C) Citrix Systems Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -14,4 +14,13 @@ val bucket_table : Rate_limit.Bucket_table.t +val create : + __context:Context.t + -> client_id:string + -> burst_size:float + -> fill_rate:float + -> [`Rate_limit] Ref.t + +val destroy : __context:Context.t -> self:[`Rate_limit] API.Ref.t -> unit + val register_xapi_globs : unit -> unit diff --git a/ocaml/xapi/xapi_session.mli b/ocaml/xapi/xapi_session.mli index 10baf03abc2..25ffa3fa366 100644 --- a/ocaml/xapi/xapi_session.mli +++ b/ocaml/xapi/xapi_session.mli @@ -15,8 +15,6 @@ * @group XenAPI functions *) -(** {2 (Fill in Title!)} *) - (* TODO: consider updating sm_exec.ml and removing login_no_password from this mli *) val login_no_password : __context:Context.t diff --git a/quality-gate.sh b/quality-gate.sh index c7965c34f0e..b6c3f7df759 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -2,146 +2,145 @@ set -e -list-hd () { - N=253 - LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$LIST_HD" -eq "$N" ]; then - echo "OK counted $LIST_HD List.hd usages" - else - echo "ERROR expected $N List.hd usages, got $LIST_HD" 1>&2 - exit 1 - fi +list-hd() { + N=253 + LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$LIST_HD" -eq "$N" ]; then + echo "OK counted $LIST_HD List.hd usages" + else + echo "ERROR expected $N List.hd usages, got $LIST_HD" 1>&2 + exit 1 + fi } -verify-cert () { - N=13 - NONE=$(git grep -r --count 'verify_cert:None' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$NONE" -eq "$N" ]; then - echo "OK counted $NONE usages of verify_cert:None" - else - echo "ERROR expected $N verify_cert:None usages, got $NONE" 1>&2 - exit 1 - fi +verify-cert() { + N=13 + NONE=$(git grep -r --count 'verify_cert:None' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$NONE" -eq "$N" ]; then + echo "OK counted $NONE usages of verify_cert:None" + else + echo "ERROR expected $N verify_cert:None usages, got $NONE" 1>&2 + exit 1 + fi } -mli-files () { - N=459 - X="ocaml/tests" - X+="|ocaml/quicktest" - X+="|ocaml/message-switch/core_test" - # do not count ml files from the tests in ocaml/{tests/quicktest} - M=$(comm -23 <(git ls-files -- '**/*.ml' | sed 's/.ml$//' | sort) \ - <(git ls-files -- '**/*.mli' | sed 's/.mli$//' | sort) |\ +mli-files() { + N=460 + X="ocaml/tests" + X+="|ocaml/quicktest" + X+="|ocaml/message-switch/core_test" + # do not count ml files from the tests in ocaml/{tests/quicktest} + M=$(comm -23 <(git ls-files -- '**/*.ml' | sed 's/.ml$//' | sort) \ + <(git ls-files -- '**/*.mli' | sed 's/.mli$//' | sort) | grep -cvE "$X") - if [ "$M" -eq "$N" ]; then - echo "OK counted $M .ml files without an .mli" - else - echo "ERROR expected $N .ml files without .mlis, got $M."\ - "If you created some .ml files, they are probably missing corresponding .mli's" 1>&2 - exit 1 - fi + if [ "$M" -eq "$N" ]; then + echo "OK counted $M .ml files without an .mli" + else + echo "ERROR expected $N .ml files without .mlis, got $M." \ + "If you created some .ml files, they are probably missing corresponding .mli's" 1>&2 + exit 1 + fi } -structural-equality () { - N=7 - EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$EQ" -eq "$N" ]; then - echo "OK counted $EQ usages of ' == '" - else - echo "ERROR expected $N usages of ' == ', got $EQ; use = rather than ==" 1>&2 - exit 1 - fi - - if git grep -r --count ' != ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml'; then - echo "ERROR expected no usages of ' != '; use <> rather than !=" 1>&2 - exit 1 - else - echo "OK found no usages of ' != '" - fi +structural-equality() { + N=7 + EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$EQ" -eq "$N" ]; then + echo "OK counted $EQ usages of ' == '" + else + echo "ERROR expected $N usages of ' == ', got $EQ; use = rather than ==" 1>&2 + exit 1 + fi + + if git grep -r --count ' != ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml'; then + echo "ERROR expected no usages of ' != '; use <> rather than !=" 1>&2 + exit 1 + else + echo "OK found no usages of ' != '" + fi } -vtpm-unimplemented () { - N=2 - VTPM=$(git grep -r --count 'maybe_raise_vtpm_unimplemented' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$VTPM" -eq "$N" ]; then - echo "OK found $VTPM usages of vtpm unimplemented errors" - else - echo "ERROR expected $N usages of unimplemented vtpm functionality, got $VTPM." 1>&2 - exit 1 - fi +vtpm-unimplemented() { + N=2 + VTPM=$(git grep -r --count 'maybe_raise_vtpm_unimplemented' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$VTPM" -eq "$N" ]; then + echo "OK found $VTPM usages of vtpm unimplemented errors" + else + echo "ERROR expected $N usages of unimplemented vtpm functionality, got $VTPM." 1>&2 + exit 1 + fi } -vtpm-fields () { - A=$(git grep -hc "vTPM'_.*:" ocaml/xapi/importexport.ml) - B=$(git grep -hc ' field' ocaml/idl/datamodel_vtpm.ml) - case "$A/$B" in - 5/6) - echo "OK found $A/$B VTPM fields in importexport.ml datamodel_vtpm.ml" - ;; - *) - echo "ERROR have VTPM fields changed? $A/$B - check importexport.ml" 1>&2 - exit 1 - ;; - esac +vtpm-fields() { + A=$(git grep -hc "vTPM'_.*:" ocaml/xapi/importexport.ml) + B=$(git grep -hc ' field' ocaml/idl/datamodel_vtpm.ml) + case "$A/$B" in + 5/6) + echo "OK found $A/$B VTPM fields in importexport.ml datamodel_vtpm.ml" + ;; + *) + echo "ERROR have VTPM fields changed? $A/$B - check importexport.ml" 1>&2 + exit 1 + ;; + esac } -ocamlyacc () { - N=0 - OCAMLYACC=$(git grep -r -o --count "ocamlyacc" '**/dune' | wc -l) - if [ "$OCAMLYACC" -eq "$N" ]; then - echo "OK found $OCAMLYACC usages of ocamlyacc usages in dune files." - else - echo "ERROR expected $N usages of ocamlyacc in dune files, got $OCAMLYACC." 1>&2 - exit 1 - fi +ocamlyacc() { + N=0 + OCAMLYACC=$(git grep -r -o --count "ocamlyacc" '**/dune' | wc -l) + if [ "$OCAMLYACC" -eq "$N" ]; then + echo "OK found $OCAMLYACC usages of ocamlyacc usages in dune files." + else + echo "ERROR expected $N usages of ocamlyacc in dune files, got $OCAMLYACC." 1>&2 + exit 1 + fi } - -unixgetenv () { - N=0 - UNIXGETENV=$(git grep -P -r -o --count 'getenv(?!_opt)' -- **/*.ml | wc -l) - if [ "$UNIXGETENV" -eq "$N" ]; then - echo "OK found $UNIXGETENV usages of exception-raising Unix.getenv in OCaml files." - else - echo "ERROR expected $N usages of exception-raising Unix.getenv in OCaml files, got $UNIXGETENV" 1>&2 - exit 1 - fi +unixgetenv() { + N=0 + UNIXGETENV=$(git grep -P -r -o --count 'getenv(?!_opt)' -- **/*.ml | wc -l) + if [ "$UNIXGETENV" -eq "$N" ]; then + echo "OK found $UNIXGETENV usages of exception-raising Unix.getenv in OCaml files." + else + echo "ERROR expected $N usages of exception-raising Unix.getenv in OCaml files, got $UNIXGETENV" 1>&2 + exit 1 + fi } -hashtblfind () { - N=33 - # Looks for all .ml files except the ones using Core.Hashtbl.find, - # which already returns Option - HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) - if [ "$HASHTBLFIND" -eq "$N" ]; then - echo "OK counted $HASHTBLFIND usages of exception-raising Hashtbl.find" - else - echo "ERROR expected $N usages of exception-raising Hashtbl.find, got $HASHTBLFIND" 1>&2 - exit 1 - fi +hashtblfind() { + N=33 + # Looks for all .ml files except the ones using Core.Hashtbl.find, + # which already returns Option + HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) + if [ "$HASHTBLFIND" -eq "$N" ]; then + echo "OK counted $HASHTBLFIND usages of exception-raising Hashtbl.find" + else + echo "ERROR expected $N usages of exception-raising Hashtbl.find, got $HASHTBLFIND" 1>&2 + exit 1 + fi } -unnecessary-length () { - N=0 - local_grep () { - git grep -r -o --count "$1" -- '**/*.ml' | wc -l - } - UNNECESSARY_LENGTH=$(local_grep "List.length.*=+\s*0") - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*=+\s*List.length"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s>\s*0"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<>\s*0"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<\s*List.length"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "0\s*<>\s*List.length"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "List.length.*\s<\s*1"))) - UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH+$(local_grep "1\s*>\s*List.length"))) - if [ "$UNNECESSARY_LENGTH" -eq "$N" ]; then - echo "OK found $UNNECESSARY_LENGTH unnecessary usages of List.length in OCaml files." - else - echo "ERROR expected $N unnecessary usages of List.length in OCaml files, +unnecessary-length() { + N=0 + local_grep() { + git grep -r -o --count "$1" -- '**/*.ml' | wc -l + } + UNNECESSARY_LENGTH=$(local_grep "List.length.*=+\s*0") + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "0\s*=+\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "List.length.*\s>\s*0"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "List.length.*\s<>\s*0"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "0\s*<\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "0\s*<>\s*List.length"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "List.length.*\s<\s*1"))) + UNNECESSARY_LENGTH=$((UNNECESSARY_LENGTH + $(local_grep "1\s*>\s*List.length"))) + if [ "$UNNECESSARY_LENGTH" -eq "$N" ]; then + echo "OK found $UNNECESSARY_LENGTH unnecessary usages of List.length in OCaml files." + else + echo "ERROR expected $N unnecessary usages of List.length in OCaml files, got $UNNECESSARY_LENGTH. Use lst =/<> [] or match statements instead." 1>&2 - exit 1 - fi + exit 1 + fi } list-hd @@ -154,4 +153,3 @@ ocamlyacc unixgetenv hashtblfind unnecessary-length - From 66b839dcb18b6aa898f5838e9cb76cf0ae799914 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Wed, 3 Dec 2025 15:15:10 +0000 Subject: [PATCH 46/59] xapi-cli-server: Add rate limit CLI operations Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi-cli-server/cli_frontend.ml | 12 +++++++++ ocaml/xapi-cli-server/cli_operations.ml | 20 ++++++++++++++ ocaml/xapi-cli-server/records.ml | 35 +++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 39e0c8ce51f..e04fdd89f0c 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -3869,6 +3869,18 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "rate-limit-create" + , { + reqd= ["client-id"; "burst-size"; "fill-rate"] + ; optn= [] + ; help= + "Add a rate limit to a XAPI client, by specifying fill rate \ + (requests per second) and burst size (maximum number of requests at \ + once)" + ; implementation= No_fd Cli_operations.Rate_limit.create + ; flags= [] + } + ) ] let cmdtable : (string, cmd_spec) Hashtbl.t = Hashtbl.create 50 diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index eb6a0eb3a80..d6a8d0824bd 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -8179,3 +8179,23 @@ module VM_group = struct in Client.VM_group.destroy ~rpc ~session_id ~self:ref end + +module Rate_limit = struct + let create printer rpc session_id params = + let client_id = List.assoc "client-id" params in + let burst_size = float_of_string (List.assoc "burst-size" params) in + let fill_rate = float_of_string (List.assoc "fill-rate" params) in + let ref = + Client.Rate_limit.create ~rpc ~session_id ~client_id ~burst_size + ~fill_rate + in + let uuid = Client.Rate_limit.get_uuid ~rpc ~session_id ~self:ref in + printer (Cli_printer.PMsg uuid) + + let destroy _printer rpc session_id params = + let ref = + Client.Rate_limit.get_by_uuid ~rpc ~session_id + ~uuid:(List.assoc "uuid" params) + in + Client.Rate_limit.destroy ~rpc ~session_id ~self:ref +end diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index ee68f272eb8..fcf56ef28cd 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -5933,3 +5933,38 @@ let pci_record rpc session_id pci = () ] } + +let rate_limit_record rpc session_id rate_limit = + let _ref = ref rate_limit in + let empty_record = + ToGet (fun () -> Client.Rate_limit.get_record ~rpc ~session_id ~self:!_ref) + in + let record = ref empty_record in + let x () = lzy_get record in + { + setref= + (fun r -> + _ref := r ; + record := empty_record + ) + ; setrefrec= + (fun (a, b) -> + _ref := a ; + record := Got b + ) + ; record= x + ; getref= (fun () -> !_ref) + ; fields= + [ + make_field ~name:"uuid" ~get:(fun () -> (x ()).API.rate_limit_uuid) () + ; make_field ~name:"client_id" + ~get:(fun () -> (x ()).API.rate_limit_client_id) + () + ; make_field ~name:"burst_size" + ~get:(fun () -> string_of_float (x ()).API.rate_limit_burst_size) + () + ; make_field ~name:"fill_rate" + ~get:(fun () -> string_of_float (x ()).API.rate_limit_fill_rate) + () + ] + } From 7f3518c8d8106e2146006c97358cc0ccab18bade Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Tue, 2 Dec 2025 16:44:10 +0000 Subject: [PATCH 47/59] token_bucket: replace mutex with lock-free atomics Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/token_bucket.ml | 66 ++++++++++++++------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/ocaml/libs/rate-limit/token_bucket.ml b/ocaml/libs/rate-limit/token_bucket.ml index 691752572c6..d59683e02e5 100644 --- a/ocaml/libs/rate-limit/token_bucket.ml +++ b/ocaml/libs/rate-limit/token_bucket.ml @@ -12,57 +12,61 @@ * GNU Lesser General Public License for more details. *) -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute +type state = {tokens: float; last_refill: Mtime.span} -type t = { - burst_size: float - ; fill_rate: float - ; mutable tokens: float - ; mutable last_refill: Mtime.span - ; mutex: Mutex.t -} +type t = {burst_size: float; fill_rate: float; state: state Atomic.t} let create_with_timestamp timestamp ~burst_size ~fill_rate = if fill_rate <= 0. then None else - Some - { - burst_size - ; fill_rate - ; tokens= burst_size - ; last_refill= timestamp - ; mutex= Mutex.create () - } + let state = Atomic.make {tokens= burst_size; last_refill= timestamp} in + Some {burst_size; fill_rate; state} let create = create_with_timestamp (Mtime_clock.elapsed ()) -let peek_with_timestamp timestamp tb = - let time_delta = Mtime.Span.abs_diff tb.last_refill timestamp in +let compute_tokens timestamp {tokens; last_refill} ~burst_size ~fill_rate = + let time_delta = Mtime.Span.abs_diff last_refill timestamp in let time_delta_seconds = Mtime.Span.to_float_ns time_delta *. 1e-9 in - min tb.burst_size (tb.tokens +. (time_delta_seconds *. tb.fill_rate)) + min burst_size (tokens +. (time_delta_seconds *. fill_rate)) + +let peek_with_timestamp timestamp tb = + let tb_state = Atomic.get tb.state in + compute_tokens timestamp tb_state ~burst_size:tb.burst_size + ~fill_rate:tb.fill_rate let peek tb = peek_with_timestamp (Mtime_clock.elapsed ()) tb let consume_with_timestamp get_time tb amount = - let do_consume () = + let rec try_consume () = let timestamp = get_time () in - let new_tokens = peek_with_timestamp timestamp tb in - tb.last_refill <- timestamp ; - if new_tokens >= amount then ( - tb.tokens <- new_tokens -. amount ; - true - ) else ( - tb.tokens <- new_tokens ; - false - ) + let old_state = Atomic.get tb.state in + let new_tokens = + compute_tokens timestamp old_state ~burst_size:tb.burst_size + ~fill_rate:tb.fill_rate + in + let success, final_tokens = + if new_tokens >= amount then + (true, new_tokens -. amount) + else + (false, new_tokens) + in + let new_state = {tokens= final_tokens; last_refill= timestamp} in + if Atomic.compare_and_set tb.state old_state new_state then + success + else + try_consume () in - with_lock tb.mutex do_consume + try_consume () let consume = consume_with_timestamp Mtime_clock.elapsed let get_delay_until_available_timestamp timestamp tb amount = - let current_tokens = peek_with_timestamp timestamp tb in + let {tokens; last_refill} = Atomic.get tb.state in + let current_tokens = + compute_tokens timestamp {tokens; last_refill} ~burst_size:tb.burst_size + ~fill_rate:tb.fill_rate + in let required_tokens = max 0. (amount -. current_tokens) in required_tokens /. tb.fill_rate From f7d3d13f3bd36f6c944f536e9c767b8a34d32c1c Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 8 Dec 2025 14:48:40 +0000 Subject: [PATCH 48/59] xapi_rate_limit: Replace xapi_globs support with datamodel The rate limiting can no longer be set from xapi_globs. Instead, the rate limiter is initialised from the database on startup now. Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi/xapi.ml | 8 ++++---- ocaml/xapi/xapi_globs.ml | 8 -------- ocaml/xapi/xapi_rate_limit.ml | 35 +++++++++------------------------- ocaml/xapi/xapi_rate_limit.mli | 3 ++- 4 files changed, 15 insertions(+), 39 deletions(-) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index a60d73fc883..d63844ceb59 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1115,10 +1115,6 @@ let server_init () = , fun () -> List.iter Xapi_http.add_handler master_only_http_handlers ) - ; ( "Registering rate limits" - , [Startup.OnlyMaster] - , fun () -> Xapi_rate_limit.register_xapi_globs () - ) ; ( "Listening unix socket" , [] , fun () -> listen_unix_socket Xapi_globs.unix_domain_socket @@ -1171,6 +1167,10 @@ let server_init () = , [] , fun () -> report_tls_verification ~__context ) + ; ( "Registering rate limits" + , [Startup.OnlyMaster] + , fun () -> Xapi_rate_limit.register ~__context + ) ; ( "Remote requests" , [Startup.OnThread] , Remote_requests.handle_requests diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index c49de6b5ef7..161273c83f9 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1111,8 +1111,6 @@ let python3_path = Constants.python3_path let observer_experimental_components = ref (StringSet.singleton Constants.observer_component_smapi) -let rate_limited_clients = ref ["test-rate-limit:1.0:0.1"] - let pool_recommendations_dir = ref "/etc/xapi.pool-recommendations.d" let disable_webserver = ref false @@ -1784,12 +1782,6 @@ let other_options = ) , "Comma-separated list of experimental observer components" ) - ; ( "rate-limited-clients" - , Arg.String (fun s -> rate_limited_clients := String.split_on_char ',' s) - , (fun () -> String.concat "," !rate_limited_clients) - , "Comma-separated list of rate limited clients and their configurations, \ - in format client:burst:refill" - ) ; ( "disable-webserver" , Arg.Set disable_webserver , (fun () -> string_of_bool !disable_webserver) diff --git a/ocaml/xapi/xapi_rate_limit.ml b/ocaml/xapi/xapi_rate_limit.ml index 1bc7b34a345..7a462aec316 100644 --- a/ocaml/xapi/xapi_rate_limit.ml +++ b/ocaml/xapi/xapi_rate_limit.ml @@ -56,31 +56,14 @@ let destroy ~__context ~self = ~user_agent:record.rate_limit_client_id ; Db.Rate_limit.destroy ~__context ~self -let register_xapi_globs () = - let configs = !Xapi_globs.rate_limited_clients in +let register ~__context = List.iter - (fun s -> - match String.split_on_char ':' s with - | [user_agent; burst_s; fill_s] -> ( - match (float_of_string_opt burst_s, float_of_string_opt fill_s) with - | Some burst_size, Some fill_rate -> - D.debug - "Adding user agent %s to bucket table with burst size %f and \ - fill rate %f" - user_agent burst_size fill_rate ; - if - not - (Rate_limit.Bucket_table.add_bucket bucket_table ~user_agent - ~burst_size ~fill_rate - ) - then - D.error - "Bucket creation failed for user agent %s: invalid fill rate %f" - user_agent fill_rate - | _ -> - D.debug "Skipping invalid numeric values in: %s\n" s - ) - | _ -> - D.debug "Skipping invalid item format: %s\n" s + (fun (_, bucket) -> + ignore + (Bucket_table.add_bucket bucket_table + ~fill_rate:bucket.API.rate_limit_fill_rate + ~user_agent:bucket.API.rate_limit_client_id + ~burst_size:bucket.API.rate_limit_burst_size + ) ) - configs + (Db.Rate_limit.get_all_records ~__context) diff --git a/ocaml/xapi/xapi_rate_limit.mli b/ocaml/xapi/xapi_rate_limit.mli index 69a4bed2b86..beacea3054e 100644 --- a/ocaml/xapi/xapi_rate_limit.mli +++ b/ocaml/xapi/xapi_rate_limit.mli @@ -23,4 +23,5 @@ val create : val destroy : __context:Context.t -> self:[`Rate_limit] API.Ref.t -> unit -val register_xapi_globs : unit -> unit +val register : __context:Context.t -> unit +(** Create token buckets in the bucket table for each record in the database *) From 88a9633c0bec51e0ff8028f396e255866e41620e Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 8 Dec 2025 14:50:31 +0000 Subject: [PATCH 49/59] xapi_http: Add rate limiting to all handlers Signed-off-by: Christian Pardillo Laursen --- ocaml/idl/datamodel.ml | 4 ++++ ocaml/xapi/xapi_http.ml | 18 +++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index d39a970a49e..63eb2aed2fb 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -11144,6 +11144,10 @@ let http_actions = ; ("put_bundle", (Put, Constants.put_bundle_uri, true, [], _R_POOL_OP, [])) ] +(* Actions that incorporate the rate limiter from Xapi_rate_limiting within their handler + For now, just RPC calls *) +let custom_rate_limit_http_actions = ["post_root"; "post_RPC2"; "post_jsonrpc"] + (* these public http actions will NOT be checked by RBAC *) (* they are meant to be used in exceptional cases where RBAC is already *) (* checked inside them, such as in the XMLRPC (API) calls *) diff --git a/ocaml/xapi/xapi_http.ml b/ocaml/xapi/xapi_http.ml index 964983d8eda..c35fdbc4244 100644 --- a/ocaml/xapi/xapi_http.ml +++ b/ocaml/xapi/xapi_http.ml @@ -351,7 +351,17 @@ let add_handler (name, handler) = failwith (Printf.sprintf "Unregistered HTTP handler: %s" name) in let check_rbac = Rbac.is_rbac_enabled_for_http_action name in - let h req ic context = + let rate_limit user_agent_opt handler = + if List.mem name Datamodel.custom_rate_limit_http_actions then + match user_agent_opt with + | None -> + handler () + | Some user_agent -> + debug "Rate limiting handler %s with user_agent %s" name user_agent ; + Rate_limit.Bucket_table.submit Xapi_rate_limit.bucket_table + ~user_agent ~callback:handler 1.0 + in + let h req ic () = let client = Http_svr.(client_of_req_and_fd req ic |> Option.map string_of_client) in @@ -361,15 +371,13 @@ let add_handler (name, handler) = if check_rbac then ( try (* session and rbac checks *) - assert_credentials_ok name req - ~fn:(fun () -> handler req ic context) - ic + assert_credentials_ok name req ~fn:(handler req ic) ic with e -> debug "Leaving RBAC-handler in xapi_http after: %s" (ExnHelper.string_of_exn e) ; raise e ) else (* no rbac checks *) - handler req ic context + rate_limit req.user_agent (handler req ic) with Api_errors.Server_error (name, params) as e -> error "Unhandled Api_errors.Server_error(%s, [ %s ])" name (String.concat "; " params) ; From 21728dcaf961cae1555740635cd16a139beb81d1 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Tue, 16 Dec 2025 10:56:06 +0000 Subject: [PATCH 50/59] rate-limit: Process requests on original thread Synchronous requests can be long-running, which can cause issues if they are all processed on the same worker thread. This commit updates the code to process synchronous requests on the original caller thread - the worker thread is now only responsible for signalling on a provided channel to wake up the caller. Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 47 +++++++++++++++++---------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index a35bcb6ac2e..92b14a26b10 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -138,21 +138,34 @@ let submit t ~user_agent ~callback amount = if need_signal then Condition.signal worker_thread_cond ) +(* Block and execute on the same thread *) let submit_sync t ~user_agent ~callback amount = - let result = ref None in - let mutex = Mutex.create () in - let condition = Condition.create () in - let wrapped_callback () = - let r = callback () in - Mutex.lock mutex ; - result := Some r ; - Condition.signal condition ; - Mutex.unlock mutex - in - submit t ~user_agent ~callback:wrapped_callback amount ; - Mutex.lock mutex ; - while Option.is_none !result do - Condition.wait condition mutex - done ; - Mutex.unlock mutex ; - Option.get !result + let map = Atomic.get t in + match StringMap.find_opt user_agent map with + | None -> + callback () + | Some bucket_data -> ( + let channel_opt = + with_lock bucket_data.process_queue_lock (fun () -> + if + Queue.is_empty bucket_data.process_queue + && Token_bucket.consume bucket_data.bucket amount + then + None (* Can run callback immediately after releasing lock *) + else + (* Rate limited, need to retrieve function result via channel *) + let channel = Event.new_channel () in + Queue.add + (amount, fun () -> Event.sync (Event.send channel ())) + bucket_data.process_queue ; + Condition.signal bucket_data.worker_thread_cond ; + Some channel + ) + in + match channel_opt with + | None -> + callback () + | Some channel -> + Event.sync (Event.receive channel) ; + callback () + ) From a8408b08aa108af1b525e5ce5cad64a68f98b144 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Thu, 11 Dec 2025 10:37:54 +0000 Subject: [PATCH 51/59] Add logging to bucket tables Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index 92b14a26b10..c02e43efab5 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -25,6 +25,8 @@ type rate_limit_data = { module StringMap = Map.Make (String) +module D = Debug.Make (struct let name = "bucket_table" end) + type t = rate_limit_data StringMap.t Atomic.t let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute @@ -126,16 +128,23 @@ let submit t ~user_agent ~callback amount = let map = Atomic.get t in match StringMap.find_opt user_agent map with | None -> + D.debug "Found no rate limited user_agent for %s, returning" user_agent ; callback () | Some {bucket; process_queue; process_queue_lock; worker_thread_cond; _} -> with_lock process_queue_lock (fun () -> if Queue.is_empty process_queue && Token_bucket.consume bucket amount - then + then ( + D.debug + "Processing callback immediately: consumed %f tokens in call \ + from user_agent %s" + amount user_agent ; callback () - else - let need_signal = Queue.is_empty process_queue in + ) else ( + D.debug "Adding callback for %f tokens from user_agent %s to queue" + amount user_agent ; Queue.add (amount, callback) process_queue ; - if need_signal then Condition.signal worker_thread_cond + Condition.signal worker_thread_cond + ) ) (* Block and execute on the same thread *) From fc6df30d154b44258422c18aba964ec72d98b763 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Thu, 11 Dec 2025 16:46:38 +0000 Subject: [PATCH 52/59] xapi_http: Fix rate limiting wrapper Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi/xapi_http.ml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_http.ml b/ocaml/xapi/xapi_http.ml index c35fdbc4244..31b20b171de 100644 --- a/ocaml/xapi/xapi_http.ml +++ b/ocaml/xapi/xapi_http.ml @@ -351,7 +351,7 @@ let add_handler (name, handler) = failwith (Printf.sprintf "Unregistered HTTP handler: %s" name) in let check_rbac = Rbac.is_rbac_enabled_for_http_action name in - let rate_limit user_agent_opt handler = + let rate_limit user_agent_opt handler () = if List.mem name Datamodel.custom_rate_limit_http_actions then match user_agent_opt with | None -> @@ -360,24 +360,27 @@ let add_handler (name, handler) = debug "Rate limiting handler %s with user_agent %s" name user_agent ; Rate_limit.Bucket_table.submit Xapi_rate_limit.bucket_table ~user_agent ~callback:handler 1.0 + else + handler () in let h req ic () = let client = Http_svr.(client_of_req_and_fd req ic |> Option.map string_of_client) in + let rate_limited_handler = rate_limit req.user_agent (handler req ic) in Debug.with_thread_associated ?client name (fun () -> try if check_rbac then ( try (* session and rbac checks *) - assert_credentials_ok name req ~fn:(handler req ic) ic + assert_credentials_ok name req ~fn:rate_limited_handler ic with e -> debug "Leaving RBAC-handler in xapi_http after: %s" (ExnHelper.string_of_exn e) ; raise e ) else (* no rbac checks *) - rate_limit req.user_agent (handler req ic) + rate_limited_handler () with Api_errors.Server_error (name, params) as e -> error "Unhandled Api_errors.Server_error(%s, [ %s ])" name (String.concat "; " params) ; From 4b1d7a9863a8dd39cc7bf0af700a34c5d26ded2a Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Fri, 12 Dec 2025 10:18:17 +0000 Subject: [PATCH 53/59] rate-limit: Bypass rate limiting if user agent not registered Signed-off-by: Christian Pardillo Laursen --- .../libs/rate-limit/test/test_bucket_table.ml | 104 ++++++++++++++++++ ocaml/xapi/server_helpers.ml | 22 ++-- ocaml/xapi/xapi_http.ml | 4 +- 3 files changed, 120 insertions(+), 10 deletions(-) diff --git a/ocaml/libs/rate-limit/test/test_bucket_table.ml b/ocaml/libs/rate-limit/test/test_bucket_table.ml index 17da5d3cd5c..7b214b9b55a 100644 --- a/ocaml/libs/rate-limit/test/test_bucket_table.ml +++ b/ocaml/libs/rate-limit/test/test_bucket_table.ml @@ -198,6 +198,106 @@ let test_submit_sync () = Alcotest.(check bool) "blocked waiting for tokens" true (elapsed_seconds >= 0.4) +let test_submit_sync_nonexistent () = + let table = Bucket_table.create () in + let result = + Bucket_table.submit_sync table ~user_agent:"nonexistent" + ~callback:(fun () -> 99) + 1.0 + in + Alcotest.(check int) + "submit_sync on nonexistent bucket runs callback immediately" 99 result + +let test_submit_sync_with_queued_items () = + (* Test that submit_sync respects FIFO ordering when queue has items *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:5.0 + ~fill_rate:10.0 + in + (* Drain the bucket *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 5.0 in + let execution_order = ref [] in + let order_mutex = Mutex.create () in + let record_execution id = + Mutex.lock order_mutex ; + execution_order := id :: !execution_order ; + Mutex.unlock order_mutex + in + (* Submit async items first *) + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 1) + 1.0 ; + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 2) + 1.0 ; + (* Now submit_sync should queue behind the async items *) + let result = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> record_execution 3 ; "sync_result") + 1.0 + in + Alcotest.(check string) + "submit_sync returns correct result" "sync_result" result ; + let order = List.rev !execution_order in + Alcotest.(check (list int)) + "submit_sync executes after queued items" [1; 2; 3] order + +let test_submit_sync_concurrent () = + (* Test multiple concurrent submit_sync calls *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:1.0 + ~fill_rate:10.0 + in + (* Drain the bucket to force queueing *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 1.0 in + let results = Array.make 5 0 in + let threads = + Array.init 5 (fun i -> + Thread.create + (fun () -> + let r = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> i + 1) + 1.0 + in + results.(i) <- r + ) + () + ) + in + Array.iter Thread.join threads ; + (* Each thread should get its own result back *) + for i = 0 to 4 do + Alcotest.(check int) + (Printf.sprintf "thread %d gets correct result" i) + (i + 1) results.(i) + done + +let test_submit_sync_interleaved () = + (* Test interleaving submit and submit_sync *) + let table = Bucket_table.create () in + let _ = + Bucket_table.add_bucket table ~user_agent:"agent1" ~burst_size:2.0 + ~fill_rate:10.0 + in + (* Drain the bucket *) + let _ = Bucket_table.try_consume table ~user_agent:"agent1" 2.0 in + let async_executed = ref false in + (* Submit async first *) + Bucket_table.submit table ~user_agent:"agent1" + ~callback:(fun () -> async_executed := true) + 1.0 ; + (* Submit sync should wait for async to complete first *) + let sync_result = + Bucket_table.submit_sync table ~user_agent:"agent1" + ~callback:(fun () -> !async_executed) + 1.0 + in + Alcotest.(check bool) + "sync callback sees async already executed" true sync_result + let test_concurrent_add_delete_stress () = (* Stress test: rapidly add and delete entries. Without proper locking, hashtable can get corrupted. *) @@ -336,6 +436,10 @@ let test = ; ("Submit nonexistent", `Quick, test_submit_nonexistent) ; ("Submit fairness", `Slow, test_submit_fairness) ; ("Submit sync", `Slow, test_submit_sync) + ; ("Submit sync interleaved", `Slow, test_submit_sync_interleaved) + ; ("Submit sync nonexistent", `Slow, test_submit_sync_nonexistent) + ; ("Submit sync concurrent", `Slow, test_submit_sync_concurrent) + ; ("Submit sync with queue", `Slow, test_submit_sync_with_queued_items) ; ("Concurrent add/delete stress", `Quick, test_concurrent_add_delete_stress) ; ("Consume during delete race", `Quick, test_consume_during_delete_race) ] diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 3601f58cbea..cb51917923f 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -200,14 +200,20 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name async ~need_complete:true in match user_agent_option with - | Some user_agent -> - D.debug - "Bucket table: Expecting to consume %f tokens from user_agent %s" - (Option.value peek_result ~default:0.) - user_agent ; - Rate_limit.Bucket_table.submit_sync Xapi_rate_limit.bucket_table - ~user_agent:(Option.value http_req.user_agent ~default:"") - ~callback 1. + | Some user_agent -> ( + match peek_result with + | Some tokens -> + D.debug + "Bucket table: Expecting to consume 1 token from user_agent %s \ + with available tokens %f" + user_agent tokens ; + Rate_limit.Bucket_table.submit_sync Xapi_rate_limit.bucket_table + ~user_agent:(Option.value http_req.user_agent ~default:"") + ~callback 1. + | None -> + D.debug "%s not registered, not throttling" user_agent ; + callback () + ) | None -> D.debug "Bucket table: user_agent was None, not throttling" ; callback () diff --git a/ocaml/xapi/xapi_http.ml b/ocaml/xapi/xapi_http.ml index 31b20b171de..0b5678e0623 100644 --- a/ocaml/xapi/xapi_http.ml +++ b/ocaml/xapi/xapi_http.ml @@ -352,7 +352,7 @@ let add_handler (name, handler) = in let check_rbac = Rbac.is_rbac_enabled_for_http_action name in let rate_limit user_agent_opt handler () = - if List.mem name Datamodel.custom_rate_limit_http_actions then + if List.mem name Datamodel.custom_rate_limit_http_actions then ( match user_agent_opt with | None -> handler () @@ -360,7 +360,7 @@ let add_handler (name, handler) = debug "Rate limiting handler %s with user_agent %s" name user_agent ; Rate_limit.Bucket_table.submit Xapi_rate_limit.bucket_table ~user_agent ~callback:handler 1.0 - else + ) else handler () in let h req ic () = From 65f98e32cdfb735bfaf7375de51b026484e06178 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Fri, 12 Dec 2025 11:24:29 +0000 Subject: [PATCH 54/59] rate-limit: Prevent possible double locks when rate limiting Signed-off-by: Christian Pardillo Laursen --- ocaml/libs/rate-limit/bucket_table.ml | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/ocaml/libs/rate-limit/bucket_table.ml b/ocaml/libs/rate-limit/bucket_table.ml index c02e43efab5..a80af32a050 100644 --- a/ocaml/libs/rate-limit/bucket_table.ml +++ b/ocaml/libs/rate-limit/bucket_table.ml @@ -131,21 +131,18 @@ let submit t ~user_agent ~callback amount = D.debug "Found no rate limited user_agent for %s, returning" user_agent ; callback () | Some {bucket; process_queue; process_queue_lock; worker_thread_cond; _} -> - with_lock process_queue_lock (fun () -> - if Queue.is_empty process_queue && Token_bucket.consume bucket amount - then ( - D.debug - "Processing callback immediately: consumed %f tokens in call \ - from user_agent %s" - amount user_agent ; - callback () - ) else ( - D.debug "Adding callback for %f tokens from user_agent %s to queue" - amount user_agent ; - Queue.add (amount, callback) process_queue ; - Condition.signal worker_thread_cond - ) - ) + let run_immediately = + with_lock process_queue_lock (fun () -> + let immediate = + Queue.is_empty process_queue && Token_bucket.consume bucket amount + in + if not immediate then + Queue.add (amount, callback) process_queue ; + Condition.signal worker_thread_cond ; + immediate + ) + in + if run_immediately then callback () (* Block and execute on the same thread *) let submit_sync t ~user_agent ~callback amount = From 02de0f6d3e7b71c54ee81dee01071f1069384e4f Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Fri, 12 Dec 2025 12:04:10 +0000 Subject: [PATCH 55/59] xapi-http: Don't rate limit handlers in the custom rate limit list Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi/xapi_http.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi_http.ml b/ocaml/xapi/xapi_http.ml index 0b5678e0623..ecd644825ef 100644 --- a/ocaml/xapi/xapi_http.ml +++ b/ocaml/xapi/xapi_http.ml @@ -352,7 +352,9 @@ let add_handler (name, handler) = in let check_rbac = Rbac.is_rbac_enabled_for_http_action name in let rate_limit user_agent_opt handler () = - if List.mem name Datamodel.custom_rate_limit_http_actions then ( + if List.mem name Datamodel.custom_rate_limit_http_actions then + handler () + else match user_agent_opt with | None -> handler () @@ -360,8 +362,6 @@ let add_handler (name, handler) = debug "Rate limiting handler %s with user_agent %s" name user_agent ; Rate_limit.Bucket_table.submit Xapi_rate_limit.bucket_table ~user_agent ~callback:handler 1.0 - ) else - handler () in let h req ic () = let client = From 5390470c2cb00d13dd00f59aa526359581c33467 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 15 Dec 2025 11:31:12 +0000 Subject: [PATCH 56/59] xe: Add rate limit operations Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi-cli-server/cli_operations.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index d6a8d0824bd..14ec2fcfacd 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -1378,6 +1378,11 @@ let gen_cmds rpc session_id = ["uuid"; "vendor-name"; "device-name"; "pci-id"] rpc session_id ) + ; Client.Rate_limit.( + mk get_all_records_where get_by_uuid rate_limit_record "rate-limit" [] + ["uuid"; "client-id"; "burst-size"; "fill-rate"] + rpc session_id + ) ] let message_create (_ : printer) rpc session_id params = From 7d17b88e97d3c4ce00a14837b45994fc9f8bb9be Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 15 Dec 2025 13:47:40 +0000 Subject: [PATCH 57/59] rate-limit: Fix names in records.ml Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi-cli-server/records.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index fcf56ef28cd..0a63099b55c 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -5957,13 +5957,13 @@ let rate_limit_record rpc session_id rate_limit = ; fields= [ make_field ~name:"uuid" ~get:(fun () -> (x ()).API.rate_limit_uuid) () - ; make_field ~name:"client_id" + ; make_field ~name:"client-id" ~get:(fun () -> (x ()).API.rate_limit_client_id) () - ; make_field ~name:"burst_size" + ; make_field ~name:"burst-size" ~get:(fun () -> string_of_float (x ()).API.rate_limit_burst_size) () - ; make_field ~name:"fill_rate" + ; make_field ~name:"fill-rate" ~get:(fun () -> string_of_float (x ()).API.rate_limit_fill_rate) () ] From 6f27897a1179479f96d479116888ad06d58c6882 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 15 Dec 2025 15:46:52 +0000 Subject: [PATCH 58/59] rate-limit: Return receipt immediately for async requests When an async request is rate limited, we confirm receipt immediately but enqueue the actual request, rather than rate limiting the first response too. Signed-off-by: Christian Pardillo Laursen --- ocaml/idl/datamodel_lifecycle.ml | 10 ++++----- ocaml/xapi/server_helpers.ml | 35 ++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 9d5f2820121..65b893cd855 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -1,6 +1,6 @@ let prototyped_of_class = function | "Rate_limit" -> - Some "25.38.0-next" + Some "25.39.0" | "Driver_variant" -> Some "25.2.0" | "Host_driver" -> @@ -16,13 +16,13 @@ let prototyped_of_class = function let prototyped_of_field = function | "Rate_limit", "fill_rate" -> - Some "25.38.0-next" + Some "25.39.0" | "Rate_limit", "burst_size" -> - Some "25.38.0-next" + Some "25.39.0" | "Rate_limit", "client_id" -> - Some "25.38.0-next" + Some "25.39.0" | "Rate_limit", "uuid" -> - Some "25.38.0-next" + Some "25.39.0" | "Driver_variant", "status" -> Some "25.2.0" | "Driver_variant", "priority" -> diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index cb51917923f..b22c02b8485 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -179,9 +179,8 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name ~marshaller op_fn ) () - ) ; + ) (* Return task id immediately *) - Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) in let user_agent_option = http_req.user_agent in let peek_result = @@ -189,34 +188,48 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name Rate_limit.Bucket_table.peek Xapi_rate_limit.bucket_table ~user_agent ) in - let callback () = + let handle_request () = match sync_ty with | `Sync -> sync () | `Async -> let need_complete = not (Context.forwarded_task __context) in - async ~need_complete + async ~need_complete ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) | `InternalAsync -> - async ~need_complete:true + async ~need_complete:true ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) in match user_agent_option with | Some user_agent -> ( match peek_result with - | Some tokens -> + | Some tokens -> ( D.debug "Bucket table: Expecting to consume 1 token from user_agent %s \ with available tokens %f" user_agent tokens ; - Rate_limit.Bucket_table.submit_sync Xapi_rate_limit.bucket_table - ~user_agent:(Option.value http_req.user_agent ~default:"") - ~callback 1. + match sync_ty with + | `Sync -> + Rate_limit.Bucket_table.submit_sync Xapi_rate_limit.bucket_table + ~user_agent ~callback:sync 1. + | `Async -> + let need_complete = not (Context.forwarded_task __context) in + Rate_limit.Bucket_table.submit Xapi_rate_limit.bucket_table + ~user_agent + ~callback:(fun () -> async ~need_complete) + 1. ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) + | `InternalAsync -> + async ~need_complete:true ; + Rpc.success (API.rpc_of_ref_task (Context.get_task_id __context)) + ) | None -> D.debug "%s not registered, not throttling" user_agent ; - callback () + handle_request () ) | None -> D.debug "Bucket table: user_agent was None, not throttling" ; - callback () + handle_request () (* regardless of forwarding, we are expected to complete the task *) From 18c5764011987382759127e04c54d66599e64cc8 Mon Sep 17 00:00:00 2001 From: Christian Pardillo Laursen Date: Mon, 15 Dec 2025 16:39:40 +0000 Subject: [PATCH 59/59] xe: Add rate-limit-destroy operation Signed-off-by: Christian Pardillo Laursen --- ocaml/xapi-cli-server/cli_frontend.ml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index e04fdd89f0c..66679b43390 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -3881,6 +3881,15 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "rate-limit-destroy" + , { + reqd= ["uuid"] + ; optn= [] + ; help= "Destroy rate limiter" + ; implementation= No_fd Cli_operations.Rate_limit.destroy + ; flags= [] + } + ) ] let cmdtable : (string, cmd_spec) Hashtbl.t = Hashtbl.create 50