Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
76 commits
Select commit Hold shift + click to select a range
5c5a816
rrdp-dcmi: Detect more errors on discovery
psafont Aug 6, 2025
e815c5c
CA-420968: compute the amount of physical cores available on a NUMA n…
edwintorok Nov 26, 2025
ff659cf
CA-420968: ensure compatibility between NUMARequest.fits and plan
edwintorok Nov 27, 2025
3671ba0
CA-420968: track number of physical cores during a NUMA planning request
edwintorok Nov 26, 2025
5d32507
CA-420968: introduce an explicit name for the current NUMA policy: Pr…
edwintorok Nov 26, 2025
b097854
CA-420968: avoid large performance hit on small NUMA nodes
edwintorok Nov 26, 2025
2dbbbd0
[doc] add missing command to xs-trace
gthvn1 Nov 27, 2025
2809d72
numa_placement: use Seq instead of List
psafont Jun 19, 2025
c1b1311
CP-309847: Make HTTP/80 configurable
Nov 26, 2025
62f962b
CP-309847: Make HTTP/80 configurable (#6770)
liulinC Dec 2, 2025
28eeaea
CA-420968: avoid large performance hit on small NUMA nodes (#6763)
edwintorok Dec 2, 2025
c57e943
numa_placement: use Seq instead of List (#6772)
edwintorok Dec 2, 2025
4da3c01
CP-31566 define xenopsd fast resume operation
robhoes Feb 24, 2023
88ece89
fixup! CP-31566 define xenopsd fast resume operation
lindig Dec 2, 2025
f255a46
increase max supported NVMe request size
lindig Dec 3, 2025
68a7737
increase max supported NVMe request size (#6783)
lindig Dec 4, 2025
58f09ac
CP-31566 define xenopsd fast resume operation (#6780)
lindig Dec 4, 2025
bf984c0
[doc] add missing command to xs-trace (#6771)
psafont Dec 5, 2025
f905d64
CA-420533: Only clear RestartVM guidance on up-to-date hosts
gangj Dec 3, 2025
fb2a1ab
CA-420533: Only clear RestartVM guidance on up-to-date hosts (#6782)
gangj Dec 5, 2025
bb705d1
qcow-stream-tool: Add read_headers command
last-genius Nov 18, 2025
5ec13cc
python3: Use pre-parsed cluster allocation data in qcow2-to-stdout
last-genius Nov 18, 2025
15f8088
vhd_tool_wrapper: Make vhd_of_device generic
last-genius Nov 18, 2025
3685968
qcow_tool_wrapper: Read headers of QCOW2-backed VDIs on export
last-genius Nov 18, 2025
8914076
qcow_tool_wrapper: Implement parse_header to determine allocated clus…
last-genius Nov 26, 2025
3387a72
qcow: Only process allocated clusters on export from raw (#6769)
last-genius Dec 8, 2025
2cc325b
opam: add missing dependencies
psafont Dec 10, 2025
8a1e83b
opam: add missing dependencies (#6788)
edwintorok Dec 10, 2025
e496d08
rrdp-dcmi: Detect more errors on discovery (#6746)
psafont Dec 10, 2025
949f1dc
CA-420856: Re-read inventory file when resetting network
minglumlu Dec 11, 2025
f66d4b6
CA-420856: Re-read inventory file when resetting network (#6789)
minglumlu Dec 11, 2025
7c6ddfe
opam: generate metadata for uuid with dune
psafont Dec 11, 2025
a9978f1
opam: generate metadata for uuid with dune (#6790)
psafont Dec 11, 2025
a1ff026
libs: remove unused type parameters
psafont Dec 11, 2025
8e21f45
git-blame: ignore another formatting commit
psafont Dec 11, 2025
cd50d44
sdk-gen: make code compatible with ocaml 5.4
psafont Dec 11, 2025
862a44b
ocaml: prepare for ocaml 5.4 (#6791)
last-genius Dec 12, 2025
2686b01
CA-421991: Fix QEMU coredumps on XS9
rosslagerwall Dec 12, 2025
654cef3
CA-421991: Fix QEMU coredumps on XS9 (#6793)
robhoes Dec 12, 2025
2e46250
CA-421914: preserve Host.numa_affinity_policy across pool join
edwintorok Dec 15, 2025
fb66dfc
CA-421847: set vcpu affinity if node claim succeeded
mg12 Dec 10, 2025
994a8dc
CA-421914: preserve Host.numa_affinity_policy across pool join (#6796)
edwintorok Dec 15, 2025
06c1d62
CA-422071: preserve latest_synced_updates_applied and pending_guidanc…
edwintorok Dec 15, 2025
bd8c79a
CA-422071: add unit test for Host.create_params
edwintorok Dec 15, 2025
b0eaef3
CA-421847: set vcpu affinity if node claim succeeded (#6794)
edwintorok Dec 15, 2025
5b121e2
CA-422071: guard against losing Host field settings on pool join (#6799)
robhoes Dec 16, 2025
f8ecfbd
libs: Add token-bucket library
cplaursen Nov 11, 2025
e3bd061
rate-limit: Test token bucket
cplaursen Nov 27, 2025
3930feb
rate-limit: Implement bucket tables
cplaursen Nov 27, 2025
a3c275f
rate-limit: Create bucket table from xapi globs
cplaursen Nov 28, 2025
85d7bc1
xapi: Add rate limiting to do_dispatch
cplaursen Nov 28, 2025
259816d
xapi rate limiting: Add logging
cplaursen Dec 1, 2025
a96c26e
rate_limit: Add rate limiter to xapi initialisation
cplaursen Dec 1, 2025
06d4d7d
Rate limiting: Improve token_bucket documentation
cplaursen Dec 1, 2025
957307e
Rate limiting: token buckets with zero or negative fill rate fail
cplaursen Dec 1, 2025
070f47b
rate-limit: Write unit tests for bucket table
cplaursen Dec 2, 2025
276bc37
rate-limit: Minor fixes to bucket table
cplaursen Dec 2, 2025
0d6fab9
rate-limit: Add readers-writer lock to bucket table
cplaursen Dec 4, 2025
3e63f2c
rate-limit: Handle rate limited requests in FIFO queue
cplaursen Dec 4, 2025
b6b0173
rate-limit: Replace readers-writer lock with atomic Map
cplaursen Dec 5, 2025
369f0d2
rate-limit: Clarify token bucket creation docs
cplaursen Dec 5, 2025
028b505
idl: Add Rate_limit datamodel
cplaursen Dec 2, 2025
66b839d
xapi-cli-server: Add rate limit CLI operations
cplaursen Dec 3, 2025
7f3518c
token_bucket: replace mutex with lock-free atomics
cplaursen Dec 2, 2025
f7d3d13
xapi_rate_limit: Replace xapi_globs support with datamodel
cplaursen Dec 8, 2025
88a9633
xapi_http: Add rate limiting to all handlers
cplaursen Dec 8, 2025
21728dc
rate-limit: Process requests on original thread
cplaursen Dec 16, 2025
a8408b0
Add logging to bucket tables
cplaursen Dec 11, 2025
fc6df30
xapi_http: Fix rate limiting wrapper
cplaursen Dec 11, 2025
4b1d7a9
rate-limit: Bypass rate limiting if user agent not registered
cplaursen Dec 12, 2025
65f98e3
rate-limit: Prevent possible double locks when rate limiting
cplaursen Dec 12, 2025
02de0f6
xapi-http: Don't rate limit handlers in the custom rate limit list
cplaursen Dec 12, 2025
5390470
xe: Add rate limit operations
cplaursen Dec 15, 2025
7d17b88
rate-limit: Fix names in records.ml
cplaursen Dec 15, 2025
6f27897
rate-limit: Return receipt immediately for async requests
cplaursen Dec 15, 2025
18c5764
xe: Add rate-limit-destroy operation
cplaursen Dec 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ b12cf444edea15da6274975e1b2ca6a7fce2a090
364c27f5d18ab9dd31825e67a93efabecad06823
d8b4de9076531dd13bdffa20cc10c72290a52356
bdf06bca7534fbc0c4fc3cee3408a51a22615226
eefc649e17086fbc200e4da114ea673825e79864

# ocp-indent
d018d26d6acd4707a23288b327b49e44f732725e
Expand Down
7 changes: 5 additions & 2 deletions doc/content/toolstack/features/Tracing/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,17 @@ and also assist newcomers in onboarding to the project.

By default, traces are generated locally in the `/var/log/dt` directory. You can copy or forward
these traces to another location or endpoint using the `xs-trace` tool. For example, if you have
a *Jaeger* server running locally, you can run:
a *Jaeger* server running locally, you can copy a trace to an endpoint by running:

```sh
xs-trace /var/log/dt/ http://127.0.0.1:9411/api/v2/spans
xs-trace cp /var/log/dt/ http://127.0.0.1:9411/api/v2/spans
```

You will then be able to visualize the traces in Jaeger.

The `xs-trace` tool also supports trace files in `.ndjson` and compressed `.zst` formats, so
you can copy or forward these files directly as well.

### Tagging Trace Sessions for Easier Search

#### Specific attributes
Expand Down
20 changes: 19 additions & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@
(name tgroup)
(depends xapi-log xapi-stdext-unix))

(package
(name rate-limit)
(synopsis "Simple token bucket-based rate-limiting")
(depends
(ocaml (>= 4.12))
xapi-log xapi-stdext-unix))

(package
(name xml-light2))

Expand Down Expand Up @@ -586,14 +593,25 @@
(depends
qcow-stream
cmdliner
yojson
)
)

(package
(name varstored-guard))

(package
(name uuid))
(name uuid)
(synopsis "Library used by xapi to generate database UUIDs")
(description
"This library allows xapi to use UUIDs with phantom types to avoid mixing UUIDs from different classes of objects. It's based on `uuidm`.")
(depends
(alcotest :with-test)
(fmt :with-test)
ptime
uuidm
)
)

(package
(name stunnel)
Expand Down
6 changes: 6 additions & 0 deletions ocaml/idl/datamodel.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10535,6 +10535,7 @@ let all_system =
; Datamodel_vm_group.t
; Datamodel_host_driver.t
; Datamodel_driver_variant.t
; Datamodel_rate_limit.t
]

(* If the relation is one-to-many, the "many" nodes (one edge each) must come before the "one" node (many edges) *)
Expand Down Expand Up @@ -10786,6 +10787,7 @@ let expose_get_all_messages_for =
; _observer
; _host_driver
; _driver_variant
; _rate_limit
]

let no_task_id_for = [_task; (* _alert; *) _event]
Expand Down Expand Up @@ -11142,6 +11144,10 @@ let http_actions =
; ("put_bundle", (Put, Constants.put_bundle_uri, true, [], _R_POOL_OP, []))
]

(* Actions that incorporate the rate limiter from Xapi_rate_limiting within their handler
For now, just RPC calls *)
let custom_rate_limit_http_actions = ["post_root"; "post_RPC2"; "post_jsonrpc"]

(* these public http actions will NOT be checked by RBAC *)
(* they are meant to be used in exceptional cases where RBAC is already *)
(* checked inside them, such as in the XMLRPC (API) calls *)
Expand Down
2 changes: 2 additions & 0 deletions ocaml/idl/datamodel_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ let _host_driver = "Host_driver"

let _driver_variant = "Driver_variant"

let _rate_limit = "Rate_limit"

let update_guidances =
Enum
( "update_guidances"
Expand Down
116 changes: 81 additions & 35 deletions ocaml/idl/datamodel_host.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,41 @@ let license_remove =
to the unlicensed edition"
~allowed_roles:_R_POOL_OP ()

let host_numa_affinity_policy =
Enum
( "host_numa_affinity_policy"
, [
("any", "VMs are spread across all available NUMA nodes")
; ( "best_effort"
, "VMs are placed on the smallest number of NUMA nodes that they fit \
using soft-pinning, but the policy doesn't guarantee a balanced \
placement, falling back to the 'any' policy."
)
; ( "default_policy"
, "Use the NUMA affinity policy that is the default for the current \
version"
)
]
)

let latest_synced_updates_applied_state =
Enum
( "latest_synced_updates_applied_state"
, [
( "yes"
, "The host is up to date with the latest updates synced from remote \
CDN"
)
; ( "no"
, "The host is outdated with the latest updates synced from remote CDN"
)
; ( "unknown"
, "If the host is up to date with the latest updates synced from \
remote CDN is unknown"
)
]
)

let create_params =
[
{
Expand Down Expand Up @@ -1398,6 +1433,51 @@ let create_params =
; param_release= numbered_release "25.32.0-next"
; param_default= Some (VMap [])
}
; {
param_type= Bool
; param_name= "https_only"
; param_doc=
"updates firewall to open or close port 80 depending on the value"
; param_release= numbered_release "25.38.0-next"
; param_default= Some (VBool false)
}
; {
param_type= host_numa_affinity_policy
; param_name= "numa_affinity_policy"
; param_doc= "NUMA-aware VM memory and vCPU placement policy"
; param_release= numbered_release "25.39.0-next"
; param_default= Some (VEnum "default_policy")
}
; {
param_type= latest_synced_updates_applied_state
; param_name= "latest_synced_updates_applied"
; param_doc=
"Default as 'unknown', 'yes' if the host is up to date with updates \
synced from remote CDN, otherwise 'no'"
; param_release= numbered_release "25.39.0-next"
; param_default= Some (VSet [])
}
; {
param_type= Set update_guidances
; param_name= "pending_guidances_full"
; param_doc=
"The set of pending full guidances after applying updates, which a \
user should follow to make some updates, e.g. specific hardware \
drivers or CPU features, fully effective, but the 'average user' \
doesn't need to"
; param_release= numbered_release "25.39.0-next"
; param_default= Some (VSet [])
}
; {
param_type= Set update_guidances
; param_name= "pending_guidances_recommended"
; param_doc=
"The set of pending recommended guidances after applying updates, \
which most users should follow to make the updates effective, but if \
not followed, will not cause a failure"
; param_release= numbered_release "25.39.0-next"
; param_default= Some (VSet [])
}
]

let create =
Expand All @@ -1416,6 +1496,7 @@ let create =
--console_idle_timeout --ssh_auto_mode options to allow them to be \
configured for new host"
)
; (Changed, "25.38.0-next", "Added --https_only to disable http")
]
~versioned_params:create_params ~doc:"Create a new host record"
~result:(Ref _host, "Reference to the newly created host object.")
Expand Down Expand Up @@ -2302,23 +2383,6 @@ let cleanup_pool_secret =
]
~allowed_roles:_R_LOCAL_ROOT_ONLY ~hide_from_docs:true ()

let host_numa_affinity_policy =
Enum
( "host_numa_affinity_policy"
, [
("any", "VMs are spread across all available NUMA nodes")
; ( "best_effort"
, "VMs are placed on the smallest number of NUMA nodes that they fit \
using soft-pinning, but the policy doesn't guarantee a balanced \
placement, falling back to the 'any' policy."
)
; ( "default_policy"
, "Use the NUMA affinity policy that is the default for the current \
version"
)
]
)

let set_numa_affinity_policy =
call ~name:"set_numa_affinity_policy" ~lifecycle:[]
~doc:"Set VM placement NUMA affinity policy"
Expand Down Expand Up @@ -2526,24 +2590,6 @@ let update_firewalld_service_status =
status."
~allowed_roles:_R_POOL_OP ()

let latest_synced_updates_applied_state =
Enum
( "latest_synced_updates_applied_state"
, [
( "yes"
, "The host is up to date with the latest updates synced from remote \
CDN"
)
; ( "no"
, "The host is outdated with the latest updates synced from remote CDN"
)
; ( "unknown"
, "If the host is up to date with the latest updates synced from \
remote CDN is unknown"
)
]
)

let get_tracked_user_agents =
call ~name:"get_tracked_user_agents" ~lifecycle:[]
~doc:
Expand Down
10 changes: 10 additions & 0 deletions ocaml/idl/datamodel_lifecycle.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
let prototyped_of_class = function
| "Rate_limit" ->
Some "25.39.0"
| "Driver_variant" ->
Some "25.2.0"
| "Host_driver" ->
Expand All @@ -13,6 +15,14 @@ let prototyped_of_class = function
None

let prototyped_of_field = function
| "Rate_limit", "fill_rate" ->
Some "25.39.0"
| "Rate_limit", "burst_size" ->
Some "25.39.0"
| "Rate_limit", "client_id" ->
Some "25.39.0"
| "Rate_limit", "uuid" ->
Some "25.39.0"
| "Driver_variant", "status" ->
Some "25.2.0"
| "Driver_variant", "priority" ->
Expand Down
40 changes: 40 additions & 0 deletions ocaml/idl/datamodel_rate_limit.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
(*
* Copyright (C) 2023 Cloud Software Group
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)

open Datamodel_types
open Datamodel_common
open Datamodel_roles

let lifecycle = []

let t =
create_obj ~name:_rate_limit ~descr:"Rate limiting policy for a XAPI client"
~doccomments:[] ~gen_constructor_destructor:true ~gen_events:true
~in_db:true ~lifecycle:[] ~persist:PersistEverything ~in_oss_since:None
~messages_default_allowed_roles:_R_POOL_ADMIN
~contents:
([uid _rate_limit ~lifecycle]
@ [
field ~qualifier:StaticRO ~ty:String ~lifecycle "client_id"
"An identifier for the rate limited client" ~ignore_foreign_key:true
~default_value:(Some (VString ""))
; field ~qualifier:StaticRO ~ty:Float ~lifecycle "burst_size"
"Amount of tokens that can be consumed in one burst"
Copy link
Member

@psafont psafont Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the idl should mention tokens or buckets at all, instead I would try to communicate the meaning of the parameters in a way that allows users to make a mental model of how rate limiting works:

Suggested change
"Amount of tokens that can be consumed in one burst"
"Amount of RPC calls that the client can do in burst"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, we shouldn't talk about token buckets and I'll change that. The plan is to assign higher token costs to more expensive calls, e.g. VM create, so we can't simplify to the level of RPC calls, but I'll figure out how to document this for users.

~ignore_foreign_key:true ~default_value:(Some (VFloat 0.))
; field ~qualifier:StaticRO ~ty:Float ~lifecycle "fill_rate"
"Tokens added to token bucket per second" ~ignore_foreign_key:true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Tokens added to token bucket per second" ~ignore_foreign_key:true
"Calls per second afforded to the client" ~ignore_foreign_key:true

~default_value:(Some (VFloat 0.))
]
)
~messages:[] ()
6 changes: 3 additions & 3 deletions ocaml/idl/dune
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
datamodel_values datamodel_schema datamodel_certificate
datamodel_diagnostics datamodel_repository datamodel_lifecycle
datamodel_vtpm datamodel_observer datamodel_vm_group api_version
datamodel_host_driver datamodel_driver_variant)
datamodel_host_driver datamodel_driver_variant datamodel_rate_limit)
(libraries
rpclib.core
sexplib0
Expand Down Expand Up @@ -64,9 +64,9 @@
)

(tests
(names schematest test_datetimes)
(names schematest test_datetimes test_host)
(modes exe)
(modules schematest test_datetimes)
(modules schematest test_datetimes test_host)
(libraries
astring
rpclib.core
Expand Down
2 changes: 1 addition & 1 deletion ocaml/idl/schematest.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex
(* BEWARE: if this changes, check that schema has been bumped accordingly in
ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *)

let last_known_schema_hash = "3b20f4304cfaaa7b6213af91ae632e64"
let last_known_schema_hash = "4708cb1f0cf7c1231c6958590ee1ed04"

let current_schema_hash : string =
let open Datamodel_types in
Expand Down
31 changes: 31 additions & 0 deletions ocaml/idl/test_host.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
module DT = Datamodel_types
module FieldSet = Astring.String.Set

let recent_field (f : DT.field) = f.lifecycle.transitions = []

let rec field_full_names = function
| DT.Field f ->
if recent_field f then
f.full_name |> String.concat "_" |> Seq.return
else
Seq.empty
| DT.Namespace (_, xs) ->
xs |> List.to_seq |> Seq.concat_map field_full_names

let () =
let create_params =
Datamodel_host.create_params
|> List.map (fun p -> p.DT.param_name)
|> FieldSet.of_list
and fields =
Datamodel_host.t.contents
|> List.to_seq
|> Seq.concat_map field_full_names
|> FieldSet.of_seq
in
let missing_in_create_params = FieldSet.diff fields create_params in
if not (FieldSet.is_empty missing_in_create_params) then (
Format.eprintf "Missing fields in create_params: %a@." FieldSet.dump
missing_in_create_params ;
exit 1
)
Empty file added ocaml/idl/test_host.mli
Empty file.
Loading
Loading