From e5d034e954782361bad363ed2a6b9be4bc34e325 Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Tue, 16 Dec 2025 13:58:58 +0100 Subject: [PATCH 1/8] Probe scripts --- README.md | 4 +- main.go | 45 ++++++++++---- playground/artifacts.go | 4 ++ playground/components.go | 29 +++------ playground/components_test.go | 2 - playground/local_runner.go | 114 ++++++++++++++++++++++++++++++++++ playground/manifest.go | 25 ++------ playground/utils/el_watch.sh | 114 ++++++++++++++++++++++++++++++++++ playground/watchdog.go | 42 ------------- 9 files changed, 278 insertions(+), 101 deletions(-) create mode 100755 playground/utils/el_watch.sh delete mode 100644 playground/watchdog.go diff --git a/README.md b/README.md index f21ccf9..0e974fc 100644 --- a/README.md +++ b/README.md @@ -131,14 +131,14 @@ To stop the playground, press `Ctrl+C`. Builder-playground supports inspecting the connection of a service to a specific port. ```bash -$ builder-playground inspect +$ builder-playground debug inspect ``` Example: ```bash $ builder-playground cook opstack -$ builder-playground inspect op-geth authrpc +$ builder-playground debug inspect op-geth authrpc ``` This command starts a `tcpflow` container in the same network interface as the service and captures the traffic to the specified port. diff --git a/main.go b/main.go index 41c0cbc..2488efe 100644 --- a/main.go +++ b/main.go @@ -95,6 +95,27 @@ var inspectCmd = &cobra.Command{ }, } +var debugCmd = &cobra.Command{ + Use: "debug", +} + +var probeCmd = &cobra.Command{ + Use: "probe", + RunE: func(cmd *cobra.Command, args []string) error { + serviceName := args[0] + + resp, err := playground.ExecuteHealthCheckManually(serviceName) + if err != nil { + return err + } + + fmt.Printf("Exit code: %d\n", resp.ExitCode) + fmt.Printf("Output: %s\n", resp.Output) + + return nil + }, +} + var versionCmd = &cobra.Command{ Use: "version", Short: "Print the version", @@ -147,12 +168,15 @@ func main() { } rootCmd.AddCommand(cookCmd) - rootCmd.AddCommand(inspectCmd) rootCmd.AddCommand(versionCmd) rootCmd.AddCommand(cleanCmd) cleanCmd.Flags().StringVar(&outputFlag, "output", "", "Output folder for the artifacts") + debugCmd.AddCommand(probeCmd) + debugCmd.AddCommand(inspectCmd) + rootCmd.AddCommand(debugCmd) + if err := rootCmd.Execute(); err != nil { fmt.Println(err) os.Exit(1) @@ -291,15 +315,6 @@ func runIt(recipe playground.Recipe) error { return fmt.Errorf("failed to wait for service readiness: %w", err) } - fmt.Printf("\nWaiting for network to be ready for transactions...\n") - networkReadyStart := time.Now() - if err := playground.CompleteReady(ctx, svcManager.Services); err != nil { - dockerRunner.Stop() - return fmt.Errorf("network not ready: %w", err) - } - fmt.Printf("Network is ready for transactions (took %.1fs)\n", time.Since(networkReadyStart).Seconds()) - fmt.Println("Session ID:", svcManager.ID) - // get the output from the recipe output := recipe.Output(svcManager) if len(output) > 0 { @@ -316,9 +331,13 @@ func runIt(recipe playground.Recipe) error { watchdogErr := make(chan error, 1) if watchdog { go func() { - if err := playground.RunWatchdog(artifacts.Out, svcManager.Services); err != nil { - watchdogErr <- fmt.Errorf("watchdog failed: %w", err) - } + // TODO: Just wait for one of the services to fail health check and stop + panic("TODO") + /* + if err := playground.RunWatchdog(artifacts.Out, svcManager.Services); err != nil { + watchdogErr <- fmt.Errorf("watchdog failed: %w", err) + } + */ }() } diff --git a/playground/artifacts.go b/playground/artifacts.go index 98f1358..e707aa9 100644 --- a/playground/artifacts.go +++ b/playground/artifacts.go @@ -61,6 +61,9 @@ var clConfigContent []byte //go:embed utils/query.sh var queryReadyCheck []byte +//go:embed utils/el_watch.sh +var elWatchScript []byte + type ArtifactsBuilder struct { outputDir string applyLatestL1Fork bool @@ -253,6 +256,7 @@ func (b *ArtifactsBuilder) Build() (*Artifacts, error) { "testnet/genesis_validators_root.txt": hex.EncodeToString(state.GenesisValidatorsRoot()), "data_validator/": &lighthouseKeystore{privKeys: priv}, "scripts/query.sh": queryReadyCheck, + "scripts/el_watch.sh": elWatchScript, }) if err != nil { return nil, err diff --git a/playground/components.go b/playground/components.go index 728c13b..33c3fcf 100644 --- a/playground/components.go +++ b/playground/components.go @@ -28,7 +28,7 @@ func (r *RollupBoost) Apply(manifest *Manifest) { service := manifest.NewService("rollup-boost"). WithImage("docker.io/flashbots/rollup-boost"). WithTag("v0.7.5"). - DependsOnHealthy(r.ELNode). + DependsOnHealthy(r.ELNode). WithArgs( "--rpc-host", "0.0.0.0", "--rpc-port", `{{Port "authrpc" 8551}}`, @@ -229,9 +229,9 @@ type ChainMonitor struct { func (c *ChainMonitor) Apply(manifest *Manifest) { manifest.NewService("chain-monitor"). - WithPort("metrics", 8080). - WithImage("ghcr.io/flashbots/chain-monitor"). - WithTag("v0.0.54"). + WithPort("metrics", 8080). + WithImage("ghcr.io/flashbots/chain-monitor"). + WithTag("v0.0.54"). DependsOnHealthy(c.L1RPC). DependsOnHealthy(c.L2RPC). WithArgs( @@ -383,13 +383,11 @@ func (o *OpGeth) Apply(manifest *Manifest) { "--metrics.port "+`{{Port "metrics" 6061}}`, ). WithVolume("data", "/data_opgeth"). - WithWatchdog(opGethWatchdogFn). - WithReadyFn(opGethReadyFn). WithArtifact("/data/l2-genesis.json", "l2-genesis.json"). WithArtifact("/data/jwtsecret", "jwtsecret"). WithArtifact("/data/p2p_key.txt", o.Enode.Artifact). WithReady(ReadyCheck{ - QueryURL: "http://localhost:8545", + Test: ElWatch("http://localhost:8545", 2*time.Second), Interval: 1 * time.Second, Timeout: 10 * time.Second, Retries: 20, @@ -480,19 +478,11 @@ func (r *RethEL) Apply(manifest *Manifest) { logLevelToRethVerbosity(manifest.ctx.LogLevel), ). WithRelease(rethELRelease). - WithWatchdog(func(out io.Writer, service *Service, ctx context.Context) error { - rethURL := fmt.Sprintf("http://localhost:%d", service.MustGetPort("http").HostPort) - return watchChainHead(out, rethURL, 12*time.Second) - }). - WithReadyFn(func(ctx context.Context, service *Service) error { - elURL := fmt.Sprintf("http://localhost:%d", service.MustGetPort("http").HostPort) - return waitForFirstBlock(ctx, elURL, 60*time.Second) - }). WithArtifact("/data/genesis.json", "genesis.json"). WithArtifact("/data/jwtsecret", "jwtsecret"). WithVolume("data", "/data_reth"). WithReady(ReadyCheck{ - QueryURL: "http://localhost:8545", + Test: ElWatch("http://localhost:8545", 12*time.Second), Interval: 1 * time.Second, Timeout: 10 * time.Second, Retries: 20, @@ -615,7 +605,6 @@ func (m *MevBoostRelay) Apply(manifest *Manifest) { WithEnv("ALLOW_SYNCING_BEACON_NODE", "1"). WithEntrypoint("mev-boost-relay"). DependsOnHealthy(m.BeaconClient). - WithWatchdog(mevboostRelayWatchdogFn). WithArgs( "--api-listen-addr", "0.0.0.0", "--api-listen-port", `{{Port "http" 5555}}`, @@ -681,15 +670,11 @@ func (o *OpReth) Apply(manifest *Manifest) { "--addr", "0.0.0.0", "--port", `{{Port "rpc" 30303}}`). WithRelease(opRethRelease). - WithWatchdog(func(out io.Writer, service *Service, ctx context.Context) error { - rethURL := fmt.Sprintf("http://localhost:%d", service.MustGetPort("http").HostPort) - return watchChainHead(out, rethURL, 2*time.Second) - }). WithArtifact("/data/jwtsecret", "jwtsecret"). WithArtifact("/data/l2-genesis.json", "l2-genesis.json"). WithVolume("data", "/data_op_reth"). WithReady(ReadyCheck{ - QueryURL: "http://localhost:8545", + Test: ElWatch("http://localhost:8545", 2*time.Second), Interval: 1 * time.Second, Timeout: 10 * time.Second, Retries: 20, diff --git a/playground/components_test.go b/playground/components_test.go index 8c114f6..f44b2e7 100644 --- a/playground/components_test.go +++ b/playground/components_test.go @@ -143,8 +143,6 @@ func (tt *testFramework) test(s ServiceGen, args []string) *Manifest { require.NoError(t, err) require.NoError(t, dockerRunner.WaitForReady(context.Background(), 20*time.Second)) - require.NoError(t, CompleteReady(context.Background(), svcManager.Services)) - return svcManager } diff --git a/playground/local_runner.go b/playground/local_runner.go index 0a72c6f..2cfb44f 100644 --- a/playground/local_runner.go +++ b/playground/local_runner.go @@ -755,6 +755,9 @@ func (d *LocalRunner) trackContainerStatusAndLogs() { case events.ActionHealthStatusHealthy: d.updateTaskStatus(name, TaskStatusHealthy) log.Info("container is healthy", "name", name) + + case events.ActionHealthStatusUnhealthy: + // TODO } case err := <-errCh: @@ -962,3 +965,114 @@ func StopContainersBySessionID(id string) error { return g.Wait() } + +type HealthCheckResponse struct { + Output string + ExitCode int +} + +func ExecuteHealthCheckManually(serviceName string) (*HealthCheckResponse, error) { + ctx := context.Background() + + cli, err := newDockerClient() + if err != nil { + return nil, fmt.Errorf("failed to create docker client: %w", err) + } + defer cli.Close() + + containerID, err := findServiceByName(cli, ctx, serviceName) + if err != nil { + return nil, err + } + + // Get the container to find the health check command + containerJSON, err := cli.ContainerInspect(ctx, containerID) + if err != nil { + return nil, fmt.Errorf("failed to inspect container: %w", err) + } + + if containerJSON.Config.Healthcheck == nil { + return nil, fmt.Errorf("container has no health check configured") + } + + healthCheckCmd := containerJSON.Config.Healthcheck.Test + + // Health check commands are usually in format: ["CMD-SHELL", "actual command"] + // or ["CMD", "arg1", "arg2", ...] + var execCmd []string + + if len(healthCheckCmd) == 0 { + return nil, fmt.Errorf("health check command is empty") + } + + if healthCheckCmd[0] == "CMD-SHELL" { + // Use sh -c to execute the shell command + if len(healthCheckCmd) > 1 { + execCmd = []string{"sh", "-c", healthCheckCmd[1]} + } else { + return nil, fmt.Errorf("CMD-SHELL specified but no command provided") + } + } else if healthCheckCmd[0] == "CMD" { + // Direct command execution + execCmd = healthCheckCmd[1:] + } else { + // Assume it's a direct command + execCmd = healthCheckCmd + } + + // Create exec instance + execConfig := container.ExecOptions{ + Cmd: execCmd, + AttachStdout: true, + AttachStderr: true, + } + + execID, err := cli.ContainerExecCreate(ctx, containerID, execConfig) + if err != nil { + return nil, fmt.Errorf("failed to create exec: %w", err) + } + + // Start the exec and get output + resp, err := cli.ContainerExecAttach(ctx, execID.ID, container.ExecStartOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to attach to exec: %w", err) + } + defer resp.Close() + + // Read all output + var outBuf bytes.Buffer + _, err = io.Copy(&outBuf, resp.Reader) + if err != nil && err != io.EOF { + return nil, fmt.Errorf("error reading output: %w", err) + } + + // Get exit code + inspectResp, err := cli.ContainerExecInspect(ctx, execID.ID) + if err != nil { + return nil, fmt.Errorf("failed to inspect exec: %w", err) + } + + healthCheckResp := &HealthCheckResponse{ + Output: strings.TrimSpace(outBuf.String()), + ExitCode: inspectResp.ExitCode, + } + return healthCheckResp, nil +} + +func findServiceByName(client *client.Client, ctx context.Context, serviceName string) (string, error) { + containers, err := client.ContainerList(ctx, container.ListOptions{ + All: true, + }) + if err != nil { + return "", fmt.Errorf("error getting container list: %w", err) + } + + for _, container := range containers { + if container.Labels["playground"] == "true" && + container.Labels["com.docker.compose.service"] == serviceName { + return container.ID, nil + } + } + + return "", nil +} diff --git a/playground/manifest.go b/playground/manifest.go index b3417de..0602544 100644 --- a/playground/manifest.go +++ b/playground/manifest.go @@ -1,10 +1,8 @@ package playground import ( - "context" "encoding/json" "fmt" - "io" "os" "path/filepath" "strings" @@ -321,16 +319,9 @@ type Service struct { Entrypoint string `json:"entrypoint,omitempty"` HostPath string `json:"host_path,omitempty"` - release *release - watchdogFn watchdogFn - readyFn readyFn + release *release } -type ( - watchdogFn func(out io.Writer, service *Service, ctx context.Context) error - readyFn func(ctx context.Context, service *Service) error -) - type DependsOnCondition string const ( @@ -439,16 +430,6 @@ func (s *Service) WithRelease(rel *release) *Service { return s } -func (s *Service) WithWatchdog(watchdogFn watchdogFn) *Service { - s.watchdogFn = watchdogFn - return s -} - -func (s *Service) WithReadyFn(readyFn readyFn) *Service { - s.readyFn = readyFn - return s -} - func (s *Service) applyTemplate(arg string) { var port []Port var nodeRef []NodeRef @@ -490,6 +471,10 @@ func (s *Service) WithReady(check ReadyCheck) *Service { return s } +func ElWatch(endpoint string, blockTime time.Duration) []string { + return []string{"CMD-SHELL", fmt.Sprintf("chmod +x /artifacts/scripts/el_watch.sh && /artifacts/scripts/el_watch.sh %s %d", endpoint, blockTime.Seconds())} +} + type ReadyCheck struct { QueryURL string `json:"query_url"` Test []string `json:"test"` diff --git a/playground/utils/el_watch.sh b/playground/utils/el_watch.sh new file mode 100755 index 0000000..b2005b9 --- /dev/null +++ b/playground/utils/el_watch.sh @@ -0,0 +1,114 @@ +#!/bin/sh + +# Function to install dependencies +install_dependencies() { + local missing_deps="$1" + + if [ -f "/etc/alpine-release" ]; then + echo "Installing $missing_deps on Alpine..." + apk add --no-cache $missing_deps >/dev/null 2>&1 || return 1 + elif [ -f "/etc/debian_version" ]; then + echo "Installing $missing_deps on Debian/Ubuntu..." + apt-get update >/dev/null 2>&1 && apt-get install -y $missing_deps >/dev/null 2>&1 || return 1 + elif [ -f "/etc/redhat-release" ]; then + echo "Installing $missing_deps on RHEL/CentOS..." + yum install -y $missing_deps >/dev/null 2>&1 || return 1 + else + echo "ERROR: No package manager found, cannot install $missing_deps" >&2 + return 1 + fi + return 0 +} + +# Check for required tools +check_tools() { + local missing="" + + if ! command -v curl >/dev/null 2>&1; then + missing="curl" + fi + + if ! command -v jq >/dev/null 2>&1; then + missing="$missing jq" + fi + + if [ -n "$missing" ]; then + echo "Missing dependencies:$missing. Attempting to install..." + if ! install_dependencies "$missing"; then + echo "ERROR: Failed to install dependencies" >&2 + exit 1 + fi + + # Verify installation + if ! command -v curl >/dev/null 2>&1 || ! command -v jq >/dev/null 2>&1; then + echo "ERROR: Dependencies still not available after installation" >&2 + exit 1 + fi + fi +} + +# Main health check function +check_chain_head() { + local el_url="${1:-http://localhost:8545}" + local block_time="${2:-12}" + local state_file="${3:-/tmp/chain_head_state}" + + # Ensure dependencies are available + check_tools + + # Add wiggle room + block_time=$((block_time + 1)) + + # Get current block number + response=$(curl -s -m 2 -X POST "$el_url" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' 2>/dev/null) + + if [ $? -ne 0 ]; then + echo "ERROR: curl failed to connect to $el_url" + exit 1 + fi + + hex_block=$(echo "$response" | jq -r '.result' 2>/dev/null) + + if [ -z "$hex_block" ] || [ "$hex_block" = "null" ]; then + echo "ERROR: Failed to get block number from response" + exit 1 + fi + + current_block=$((hex_block)) + current_time=$(date +%s) + + # Read previous state + if [ -f "$state_file" ]; then + read -r prev_block prev_time < "$state_file" + else + # First run - just save state and succeed + echo "$current_block $current_time" > "$state_file" + echo "OK: Initial check, block $current_block" + exit 0 + fi + + # Check if block advanced + if [ "$current_block" -gt "$prev_block" ]; then + # Block advanced - update state and succeed + echo "$current_block $current_time" > "$state_file" + echo "OK: Chain head advanced to $current_block" + exit 0 + fi + + # Block hasn't advanced - check if we've exceeded timeout + elapsed=$((current_time - prev_time)) + + if [ "$elapsed" -ge "$block_time" ]; then + echo "ERROR: Chain head stuck at $current_block for ${elapsed}s (timeout: ${block_time}s)" + exit 1 + fi + + # Block hasn't advanced but still within timeout + echo "OK: Chain head at $current_block, waiting ${elapsed}/${block_time}s" + exit 0 +} + +# Run the check +check_chain_head "$@" diff --git a/playground/watchdog.go b/playground/watchdog.go deleted file mode 100644 index ab27c24..0000000 --- a/playground/watchdog.go +++ /dev/null @@ -1,42 +0,0 @@ -package playground - -import ( - "context" - "fmt" -) - -func RunWatchdog(out *output, services []*Service) error { - watchdogErr := make(chan error, len(services)) - - output, err := out.LogOutput("watchdog") - if err != nil { - return fmt.Errorf("failed to create log output: %w", err) - } - - for _, s := range services { - if watchdogFn := s.watchdogFn; watchdogFn != nil { - go func() { - if err := watchdogFn(output, s, context.Background()); err != nil { - watchdogErr <- fmt.Errorf("service %s watchdog failed: %w", s.Name, err) - } - }() - } - } - - // If any of the watchdogs fail, we return the error - if err := <-watchdogErr; err != nil { - return fmt.Errorf("failed to run watchdog: %w", err) - } - return nil -} - -func CompleteReady(ctx context.Context, services []*Service) error { - for _, s := range services { - if readyFn := s.readyFn; readyFn != nil { - if err := readyFn(ctx, s); err != nil { - return err - } - } - } - return nil -} From 1f18b0f228b378a014b669971aa93514f1481a7b Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Wed, 17 Dec 2025 09:51:48 +0100 Subject: [PATCH 2/8] Add more fixes --- main.go | 16 ++++----- playground/local_runner.go | 44 +++++++++++++++++-------- playground/local_runner_test.go | 4 +-- playground/manifest.go | 2 +- playground/utils/el_watch.sh | 58 ++++++++++++++++++++++++--------- 5 files changed, 84 insertions(+), 40 deletions(-) diff --git a/main.go b/main.go index 007f1d0..469e738 100644 --- a/main.go +++ b/main.go @@ -305,14 +305,14 @@ func runIt(recipe playground.Recipe) error { if interactive { i := playground.NewInteractiveDisplay(svcManager) - cfg.Callback = i.HandleUpdate + cfg.AddCallback(i.HandleUpdate) } // Add callback to log service updates in debug mode if logLevel == playground.LevelDebug { - cfg.Callback = func(serviceName string, update playground.TaskStatus) { + cfg.AddCallback(func(serviceName string, update playground.TaskStatus) { log.Printf("[DEBUG] [%s] %s\n", serviceName, update) - } + }) } dockerRunner, err := playground.NewLocalRunner(cfg) @@ -369,13 +369,11 @@ func runIt(recipe playground.Recipe) error { watchdogErr := make(chan error, 1) if watchdog { go func() { - // TODO: Just wait for one of the services to fail health check and stop - panic("TODO") - /* - if err := playground.RunWatchdog(artifacts.Out, svcManager.Services); err != nil { - watchdogErr <- fmt.Errorf("watchdog failed: %w", err) + cfg.AddCallback(func(name string, status playground.TaskStatus) { + if status == playground.TaskStatusUnhealty { + watchdogErr <- fmt.Errorf("watchdog failed: %w", fmt.Errorf("task '%s' is not healthy anymore", name)) } - */ + }) }() } diff --git a/playground/local_runner.go b/playground/local_runner.go index aec4f6b..69c9ca3 100644 --- a/playground/local_runner.go +++ b/playground/local_runner.go @@ -73,12 +73,13 @@ type task struct { type TaskStatus string var ( - TaskStatusPulling TaskStatus = "pulling" - TaskStatusPulled TaskStatus = "pulled" - TaskStatusPending TaskStatus = "pending" - TaskStatusStarted TaskStatus = "started" - TaskStatusDie TaskStatus = "die" - TaskStatusHealthy TaskStatus = "healthy" + TaskStatusPulling TaskStatus = "pulling" + TaskStatusPulled TaskStatus = "pulled" + TaskStatusPending TaskStatus = "pending" + TaskStatusStarted TaskStatus = "started" + TaskStatusDie TaskStatus = "die" + TaskStatusHealthy TaskStatus = "healthy" + TaskStatusUnhealty TaskStatus = "unhealthy" ) func newDockerClient() (*client.Client, error) { @@ -97,9 +98,17 @@ type RunnerConfig struct { Labels map[string]string LogInternally bool Platform string - Callback func(serviceName string, update TaskStatus) + Callbacks []Callback } +func (r *RunnerConfig) AddCallback(c Callback) { + if r.Callbacks == nil { + r.Callbacks = append(r.Callbacks, c) + } +} + +type Callback func(serviceName string, update TaskStatus) + func NewLocalRunner(cfg *RunnerConfig) (*LocalRunner, error) { client, err := newDockerClient() if err != nil { @@ -144,8 +153,8 @@ func NewLocalRunner(cfg *RunnerConfig) (*LocalRunner, error) { cfg.NetworkName = defaultNetworkName } - if cfg.Callback == nil { - cfg.Callback = func(serviceName string, update TaskStatus) {} // noop + if cfg.Callbacks == nil { + cfg.Callbacks = []Callback{func(serviceName string, update TaskStatus) {}} // noop } d := &LocalRunner{ @@ -205,11 +214,19 @@ func (d *LocalRunner) WaitForReady(ctx context.Context, timeout time.Duration) e } } +func (d *LocalRunner) emitCallback(name string, status TaskStatus) { + for _, callback := range d.config.Callbacks { + callback(name, status) + } +} + func (d *LocalRunner) updateTaskStatus(name string, status TaskStatus) { d.tasksMtx.Lock() defer d.tasksMtx.Unlock() if status == TaskStatusHealthy { d.tasks[name].ready = true + } else if status == TaskStatusUnhealty { + d.tasks[name].ready = false } else { d.tasks[name].status = status } @@ -218,7 +235,8 @@ func (d *LocalRunner) updateTaskStatus(name string, status TaskStatus) { d.exitErr <- fmt.Errorf("container %s failed", name) } - d.config.Callback(name, status) + fmt.Println("=>", name, status) + d.emitCallback(name, status) } func (d *LocalRunner) ExitErr() <-chan error { @@ -784,7 +802,7 @@ func (d *LocalRunner) trackContainerStatusAndLogs() { log.Info("container is healthy", "name", name) case events.ActionHealthStatusUnhealthy: - // TODO + d.updateTaskStatus(name, TaskStatusUnhealty) } case err := <-errCh: @@ -867,7 +885,7 @@ func (d *LocalRunner) ensureImage(ctx context.Context, imageName string) error { } // Image not found locally, pull it - d.config.Callback(imageName, TaskStatusPulling) + d.emitCallback(imageName, TaskStatusPulling) slog.Info("pulling image", "image", imageName) reader, err := d.client.ImagePull(ctx, imageName, image.PullOptions{}) @@ -882,7 +900,7 @@ func (d *LocalRunner) ensureImage(ctx context.Context, imageName string) error { return fmt.Errorf("failed to read image pull output %s: %w", imageName, err) } - d.config.Callback(imageName, TaskStatusPulled) + d.emitCallback(imageName, TaskStatusPulled) return nil } diff --git a/playground/local_runner_test.go b/playground/local_runner_test.go index 8cc5448..04b4b51 100644 --- a/playground/local_runner_test.go +++ b/playground/local_runner_test.go @@ -37,8 +37,8 @@ func TestRunnerPullImages(t *testing.T) { } cfg := &RunnerConfig{ - Manifest: manifest, - Callback: callback, + Manifest: manifest, + Callbacks: []Callback{callback}, } runner, err := NewLocalRunner(cfg) require.NoError(t, err) diff --git a/playground/manifest.go b/playground/manifest.go index 0602544..54eeaaa 100644 --- a/playground/manifest.go +++ b/playground/manifest.go @@ -472,7 +472,7 @@ func (s *Service) WithReady(check ReadyCheck) *Service { } func ElWatch(endpoint string, blockTime time.Duration) []string { - return []string{"CMD-SHELL", fmt.Sprintf("chmod +x /artifacts/scripts/el_watch.sh && /artifacts/scripts/el_watch.sh %s %d", endpoint, blockTime.Seconds())} + return []string{"CMD-SHELL", fmt.Sprintf("chmod +x /artifacts/scripts/el_watch.sh && /artifacts/scripts/el_watch.sh %s %d", endpoint, int(blockTime.Seconds()))} } type ReadyCheck struct { diff --git a/playground/utils/el_watch.sh b/playground/utils/el_watch.sh index b2005b9..77e143f 100755 --- a/playground/utils/el_watch.sh +++ b/playground/utils/el_watch.sh @@ -24,14 +24,12 @@ install_dependencies() { check_tools() { local missing="" - if ! command -v curl >/dev/null 2>&1; then + # Check for curl or wget + if ! command -v curl >/dev/null 2>&1 && ! command -v wget >/dev/null 2>&1; then + # Prefer curl as it's more common missing="curl" fi - if ! command -v jq >/dev/null 2>&1; then - missing="$missing jq" - fi - if [ -n "$missing" ]; then echo "Missing dependencies:$missing. Attempting to install..." if ! install_dependencies "$missing"; then @@ -40,13 +38,43 @@ check_tools() { fi # Verify installation - if ! command -v curl >/dev/null 2>&1 || ! command -v jq >/dev/null 2>&1; then + if ! command -v curl >/dev/null 2>&1 && ! command -v wget >/dev/null 2>&1; then echo "ERROR: Dependencies still not available after installation" >&2 exit 1 fi fi } +# Extract JSON value without jq +# Usage: extract_json_value '{"result":"0x123"}' "result" +extract_json_value() { + local json="$1" + local key="$2" + + # Use sed to extract the value for the given key + # Matches: "key":"value" or "key":value + echo "$json" | sed -n 's/.*"'"$key"'"\s*:\s*"\?\([^,"}\]*\)"\?.*/\1/p' | head -1 +} + +# Make HTTP request (supports both curl and wget) +http_post() { + local url="$1" + local data="$2" + + if command -v curl >/dev/null 2>&1; then + curl -s -m 2 -X POST "$url" \ + -H "Content-Type: application/json" \ + -d "$data" 2>/dev/null + elif command -v wget >/dev/null 2>&1; then + wget -q -O - --timeout=2 --post-data="$data" \ + --header="Content-Type: application/json" \ + "$url" 2>/dev/null + else + echo "ERROR: Neither curl nor wget available" >&2 + return 1 + fi +} + # Main health check function check_chain_head() { local el_url="${1:-http://localhost:8545}" @@ -57,26 +85,26 @@ check_chain_head() { check_tools # Add wiggle room - block_time=$((block_time + 1)) + block_time=$(expr "$block_time" + 1) # Get current block number - response=$(curl -s -m 2 -X POST "$el_url" \ - -H "Content-Type: application/json" \ - -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' 2>/dev/null) + response=$(http_post "$el_url" '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}') - if [ $? -ne 0 ]; then - echo "ERROR: curl failed to connect to $el_url" + if [ $? -ne 0 ] || [ -z "$response" ]; then + echo "ERROR: Failed to connect to $el_url" exit 1 fi - hex_block=$(echo "$response" | jq -r '.result' 2>/dev/null) + # Extract the hex block number from JSON response + hex_block=$(extract_json_value "$response" "result") if [ -z "$hex_block" ] || [ "$hex_block" = "null" ]; then echo "ERROR: Failed to get block number from response" exit 1 fi - current_block=$((hex_block)) + # Convert hex to decimal + current_block=$(printf "%d" "$hex_block") current_time=$(date +%s) # Read previous state @@ -98,7 +126,7 @@ check_chain_head() { fi # Block hasn't advanced - check if we've exceeded timeout - elapsed=$((current_time - prev_time)) + elapsed=$(expr "$current_time" - "$prev_time") if [ "$elapsed" -ge "$block_time" ]; then echo "ERROR: Chain head stuck at $current_block for ${elapsed}s (timeout: ${block_time}s)" From e742adf690c10feab7ddbeaf5c5075f969b113e1 Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Wed, 17 Dec 2025 11:11:16 +0100 Subject: [PATCH 3/8] Try to use healthmon --- playground/components.go | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/playground/components.go b/playground/components.go index 33c3fcf..2b87ee4 100644 --- a/playground/components.go +++ b/playground/components.go @@ -480,14 +480,9 @@ func (r *RethEL) Apply(manifest *Manifest) { WithRelease(rethELRelease). WithArtifact("/data/genesis.json", "genesis.json"). WithArtifact("/data/jwtsecret", "jwtsecret"). - WithVolume("data", "/data_reth"). - WithReady(ReadyCheck{ - Test: ElWatch("http://localhost:8545", 12*time.Second), - Interval: 1 * time.Second, - Timeout: 10 * time.Second, - Retries: 20, - StartPeriod: 1 * time.Second, - }) + WithVolume("data", "/data_reth") + + UseHealthmon(manifest, svc) if r.UseNativeReth { // we need to use this otherwise the db cannot be binded @@ -918,3 +913,19 @@ func (b *BuilderHub) Apply(manifest *Manifest) { WithEnv("TARGET", Connect("web", "http")). DependsOnHealthy("web") } + +func UseHealthmon(m *Manifest, s *Service) { + m.NewService("test-h"). + WithImage("ghcr.io/alexallah/ethereum-healthmon"). + WithTag("v1.3.1"). + WithArgs("--chain", "execution", "--addr", "el"). + WithLabel("sidecar", "true"). + WithLabel("parent", s.Name). + WithReady(ReadyCheck{ + Test: []string{"CMD", "wget", "--spider", "--quiet", "http://127.0.0.1:21171/ready"}, + Interval: 1 * time.Second, + Timeout: 10 * time.Second, + Retries: 20, + StartPeriod: 1 * time.Second, + }) +} From 143c0df4765396353cfee09109f772b7c818e0e3 Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Wed, 17 Dec 2025 12:32:09 +0100 Subject: [PATCH 4/8] Use healhtmon docker --- main.go | 2 + playground/components.go | 36 +---- playground/components_test.go | 28 ++++ playground/local_runner.go | 1 - playground/watchers.go | 293 ---------------------------------- 5 files changed, 34 insertions(+), 326 deletions(-) delete mode 100644 playground/watchers.go diff --git a/main.go b/main.go index 469e738..2f6d54a 100644 --- a/main.go +++ b/main.go @@ -353,6 +353,8 @@ func runIt(recipe playground.Recipe) error { return fmt.Errorf("failed to wait for service readiness: %w", err) } + fmt.Println("\nServices healthy... Ready to accept transactions") + // get the output from the recipe output := recipe.Output(svcManager) if len(output) > 0 { diff --git a/playground/components.go b/playground/components.go index 2b87ee4..5bd68f8 100644 --- a/playground/components.go +++ b/playground/components.go @@ -1,9 +1,7 @@ package playground import ( - "context" "fmt" - "io" "strconv" "strings" "time" @@ -395,16 +393,6 @@ func (o *OpGeth) Apply(manifest *Manifest) { }) } -func opGethReadyFn(ctx context.Context, service *Service) error { - opGethURL := fmt.Sprintf("http://localhost:%d", service.MustGetPort("http").HostPort) - return waitForFirstBlock(ctx, opGethURL, 60*time.Second) -} - -func opGethWatchdogFn(out io.Writer, service *Service, ctx context.Context) error { - gethURL := fmt.Sprintf("http://localhost:%d", service.MustGetPort("http").HostPort) - return watchChainHead(out, gethURL, 2*time.Second) -} - type RethEL struct { UseRethForValidation bool UseNativeReth bool @@ -611,20 +599,6 @@ func (m *MevBoostRelay) Apply(manifest *Manifest) { } } -func mevboostRelayWatchdogFn(out io.Writer, service *Service, ctx context.Context) error { - beaconNodeURL := fmt.Sprintf("http://localhost:%d", service.MustGetPort("http").HostPort) - - watchGroup := newWatchGroup() - watchGroup.watch(func() error { - return watchProposerPayloads(beaconNodeURL) - }) - watchGroup.watch(func() error { - return validateProposerPayloads(out, beaconNodeURL) - }) - - return watchGroup.wait() -} - type OpReth struct{} var opRethRelease = &release{ @@ -915,12 +889,10 @@ func (b *BuilderHub) Apply(manifest *Manifest) { } func UseHealthmon(m *Manifest, s *Service) { - m.NewService("test-h"). - WithImage("ghcr.io/alexallah/ethereum-healthmon"). - WithTag("v1.3.1"). - WithArgs("--chain", "execution", "--addr", "el"). - WithLabel("sidecar", "true"). - WithLabel("parent", s.Name). + m.NewService(s.Name+"_healthmon"). + WithImage("ghcr.io/flashbots/ethereum-healthmon"). + WithTag("v0.0.1"). + WithArgs("--chain", "execution", "--url", Connect(s.Name, "http")). WithReady(ReadyCheck{ Test: []string{"CMD", "wget", "--spider", "--quiet", "http://127.0.0.1:21171/ready"}, Interval: 1 * time.Second, diff --git a/playground/components_test.go b/playground/components_test.go index f44b2e7..4a2aa67 100644 --- a/playground/components_test.go +++ b/playground/components_test.go @@ -12,6 +12,8 @@ import ( "testing" "time" + "github.com/ethereum/go-ethereum/ethclient" + "github.com/ethereum/go-ethereum/rpc" "github.com/stretchr/testify/require" ) @@ -162,3 +164,29 @@ func toSnakeCase(s string) string { // Convert to lowercase return strings.ToLower(snake) } + +func waitForBlock(elURL string, targetBlock uint64, timeout time.Duration) error { + rpcClient, err := rpc.Dial(elURL) + if err != nil { + return fmt.Errorf("failed to connect to %s: %w", elURL, err) + } + defer rpcClient.Close() + + clt := ethclient.NewClient(rpcClient) + timeoutCh := time.After(timeout) + + for { + select { + case <-timeoutCh: + return fmt.Errorf("timeout waiting for block %d on %s", targetBlock, elURL) + case <-time.After(500 * time.Millisecond): + num, err := clt.BlockNumber(context.Background()) + if err != nil { + continue + } + if num >= targetBlock { + return nil + } + } + } +} diff --git a/playground/local_runner.go b/playground/local_runner.go index 69c9ca3..d0b7976 100644 --- a/playground/local_runner.go +++ b/playground/local_runner.go @@ -235,7 +235,6 @@ func (d *LocalRunner) updateTaskStatus(name string, status TaskStatus) { d.exitErr <- fmt.Errorf("container %s failed", name) } - fmt.Println("=>", name, status) d.emitCallback(name, status) } diff --git a/playground/watchers.go b/playground/watchers.go deleted file mode 100644 index 4e2aece..0000000 --- a/playground/watchers.go +++ /dev/null @@ -1,293 +0,0 @@ -package playground - -import ( - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "time" - - "github.com/ethereum/go-ethereum/ethclient" - "github.com/ethereum/go-ethereum/rpc" - "github.com/flashbots/mev-boost-relay/beaconclient" - mevRCommon "github.com/flashbots/mev-boost-relay/common" -) - -func waitForFirstBlock(ctx context.Context, elURL string, timeout time.Duration) error { - rpcClient, err := rpc.Dial(elURL) - if err != nil { - fmt.Printf(" [%s] Failed to connect: %v\n", elURL, err) - return err - } - defer rpcClient.Close() - - clt := ethclient.NewClient(rpcClient) - fmt.Printf(" [%s] Connected, waiting for first block...\n", elURL) - - timeoutCh := time.After(timeout) - checkCount := 0 - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-timeoutCh: - return fmt.Errorf("timeout waiting for first block on %s", elURL) - case <-time.After(500 * time.Millisecond): - num, err := clt.BlockNumber(ctx) - checkCount++ - if err != nil { - if checkCount%10 == 0 { - fmt.Printf(" [%s] Error getting block number: %v\n", elURL, err) - } - continue - } - if num > 0 { - fmt.Printf(" [%s] First block detected: %d\n", elURL, num) - return nil - } - if checkCount%10 == 0 { - fmt.Printf(" [%s] Block number: %d (waiting for > 0)\n", elURL, num) - } - } - } -} - -func waitForChainAlive(ctx context.Context, logOutput io.Writer, beaconNodeURL string, timeout time.Duration) error { - // Test that blocks are being produced - log := mevRCommon.LogSetup(false, "info").WithField("context", "waitForChainAlive") - log.Logger.Out = logOutput - - clt := beaconclient.NewProdBeaconInstance(log, beaconNodeURL, beaconNodeURL) - - // Subscribe to head events right away even if the connection has not been established yet - // That is handled internally in the function already. - // Otherwise, if we connect only when the first head slot happens we might miss some initial slots. - ch := make(chan beaconclient.PayloadAttributesEvent) - go clt.SubscribeToPayloadAttributesEvents(ch) - - { - // If the chain has not started yet, wait for it to start. - // Otherwise, the subscription will not return any data. - bClient := beaconclient.NewMultiBeaconClient(log, []beaconclient.IBeaconInstance{ - clt, - }) - - isReady := func() bool { - sync, err := bClient.BestSyncStatus() - if err != nil { - return false - } - return sync.HeadSlot >= 1 - } - - if !isReady() { - syncTimeoutCh := time.After(timeout) - for { - if isReady() { - break - } - select { - case <-syncTimeoutCh: - return fmt.Errorf("beacon client failed to start") - case <-ctx.Done(): - return fmt.Errorf("timeout waiting for chain to start") - default: - time.Sleep(1 * time.Second) - } - } - } - } - - return nil -} - -// validateProposerPayloads validates that payload attribute events are being broadcasted by the beacon node -// in the correct order without any missing slots. -func validateProposerPayloads(logOutput io.Writer, beaconNodeURL string) error { - // Test that blocks are being produced - log := mevRCommon.LogSetup(false, "info").WithField("context", "validateProposerPayloads") - log.Logger.Out = logOutput - - clt := beaconclient.NewProdBeaconInstance(log, beaconNodeURL, beaconNodeURL) - - // We run this after 'waitForChainAlive' to ensure that the beacon node is ready to receive payloads. - ch := make(chan beaconclient.PayloadAttributesEvent) - go clt.SubscribeToPayloadAttributesEvents(ch) - - log.Infof("Chain is alive. Subscribing to head events") - - var lastSlot uint64 - for { - select { - case head := <-ch: - log.Infof("Slot: %d Parent block number: %d", head.Data.ProposalSlot, head.Data.ParentBlockNumber) - - // If we are being notified of a new slot, validate that the slots are contiguous - // Note that lighthouse might send multiple updates for the same slot. - if lastSlot != 0 && lastSlot != head.Data.ProposalSlot && lastSlot+1 != head.Data.ProposalSlot { - return fmt.Errorf("slot mismatch, expected %d, got %d", lastSlot+1, head.Data.ProposalSlot) - } - // if the network did not miss any initial slots, lighthouse will send payload attribute updates - // of the form: (slot = slot, parent block number = slot - 2), (slot, slot - 1). - // The -2 is in case we want to handle reorgs in the chain. - // We need to validate that at least the difference between the parent block number and the slot is 2. - if head.Data.ProposalSlot-head.Data.ParentBlockNumber > 2 { - return fmt.Errorf("parent block too big %d", head.Data.ParentBlockNumber) - } - - lastSlot = head.Data.ProposalSlot - case <-time.After(20 * time.Second): - return fmt.Errorf("timeout waiting for block") - } - } -} - -func watchProposerPayloads(beaconNodeURL string) error { - getProposerPayloadDelivered := func() ([]*mevRCommon.BidTraceV2JSON, error) { - resp, err := http.Get(fmt.Sprintf("%s/relay/v1/data/bidtraces/proposer_payload_delivered", beaconNodeURL)) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - data, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - - var payloadDeliveredList []*mevRCommon.BidTraceV2JSON - if err := json.Unmarshal(data, &payloadDeliveredList); err != nil { - return nil, err - } - return payloadDeliveredList, nil - } - - // Wait for at least 10 seconds for Mev-boost to start - timerC := time.After(10 * time.Second) -LOOP: - for { - select { - case <-timerC: - break - case <-time.After(2 * time.Second): - if _, err := getProposerPayloadDelivered(); err == nil { - break LOOP - } - } - } - - // This is not the most efficient solution since we are querying the endpoint for the full list of payloads - // every 2 seconds. It should be fine for the kind of workloads expected to run. - - lastSlot := uint64(0) - - for { - time.Sleep(2 * time.Second) - - vals, err := getProposerPayloadDelivered() - if err != nil { - fmt.Println("Error getting proposer payloads:", err) - continue - } - - for _, val := range vals { - if val.Slot <= lastSlot { - continue - } - - fmt.Printf("Block Proposed: Slot: %d, Builder: %s, Block: %d\n", val.Slot, val.BuilderPubkey, val.BlockNumber) - lastSlot = val.Slot - } - } -} - -func waitForBlock(elURL string, targetBlock uint64, timeout time.Duration) error { - rpcClient, err := rpc.Dial(elURL) - if err != nil { - return fmt.Errorf("failed to connect to %s: %w", elURL, err) - } - defer rpcClient.Close() - - clt := ethclient.NewClient(rpcClient) - timeoutCh := time.After(timeout) - - for { - select { - case <-timeoutCh: - return fmt.Errorf("timeout waiting for block %d on %s", targetBlock, elURL) - case <-time.After(500 * time.Millisecond): - num, err := clt.BlockNumber(context.Background()) - if err != nil { - continue - } - if num >= targetBlock { - return nil - } - } - } -} - -// watchChainHead watches the chain head and ensures that it is advancing -func watchChainHead(logOutput io.Writer, elURL string, blockTime time.Duration) error { - log := mevRCommon.LogSetup(false, "info").WithField("context", "watchChainHead").WithField("el", elURL) - log.Logger.Out = logOutput - - // add some wiggle room to block time - blockTime = blockTime + 1*time.Second - - rpcClient, err := rpc.Dial(elURL) - if err != nil { - return err - } - - var latestBlock *uint64 - clt := ethclient.NewClient(rpcClient) - - timeout := time.NewTimer(blockTime) - defer timeout.Stop() - - for { - select { - case <-time.After(500 * time.Millisecond): - num, err := clt.BlockNumber(context.Background()) - if err != nil { - return err - } - if latestBlock != nil && num <= *latestBlock { - continue - } - log.Infof("Chain head: %d", num) - latestBlock = &num - - // Reset timeout since we saw a new block - if !timeout.Stop() { - <-timeout.C - } - timeout.Reset(blockTime) - - case <-timeout.C: - return fmt.Errorf("chain head for %s not advancing", elURL) - } - } -} - -type watchGroup struct { - errCh chan error -} - -func newWatchGroup() *watchGroup { - return &watchGroup{ - errCh: make(chan error, 1), - } -} - -func (wg *watchGroup) watch(watch func() error) { - go func() { - wg.errCh <- watch() - }() -} - -func (wg *watchGroup) wait() error { - return <-wg.errCh -} From d156492619140de2c6e70f3cd3e8fccef52b3c03 Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Wed, 17 Dec 2025 12:36:28 +0100 Subject: [PATCH 5/8] Remove old file --- playground/artifacts.go | 4 - playground/utils/el_watch.sh | 142 ----------------------------------- 2 files changed, 146 deletions(-) delete mode 100755 playground/utils/el_watch.sh diff --git a/playground/artifacts.go b/playground/artifacts.go index e707aa9..98f1358 100644 --- a/playground/artifacts.go +++ b/playground/artifacts.go @@ -61,9 +61,6 @@ var clConfigContent []byte //go:embed utils/query.sh var queryReadyCheck []byte -//go:embed utils/el_watch.sh -var elWatchScript []byte - type ArtifactsBuilder struct { outputDir string applyLatestL1Fork bool @@ -256,7 +253,6 @@ func (b *ArtifactsBuilder) Build() (*Artifacts, error) { "testnet/genesis_validators_root.txt": hex.EncodeToString(state.GenesisValidatorsRoot()), "data_validator/": &lighthouseKeystore{privKeys: priv}, "scripts/query.sh": queryReadyCheck, - "scripts/el_watch.sh": elWatchScript, }) if err != nil { return nil, err diff --git a/playground/utils/el_watch.sh b/playground/utils/el_watch.sh deleted file mode 100755 index 77e143f..0000000 --- a/playground/utils/el_watch.sh +++ /dev/null @@ -1,142 +0,0 @@ -#!/bin/sh - -# Function to install dependencies -install_dependencies() { - local missing_deps="$1" - - if [ -f "/etc/alpine-release" ]; then - echo "Installing $missing_deps on Alpine..." - apk add --no-cache $missing_deps >/dev/null 2>&1 || return 1 - elif [ -f "/etc/debian_version" ]; then - echo "Installing $missing_deps on Debian/Ubuntu..." - apt-get update >/dev/null 2>&1 && apt-get install -y $missing_deps >/dev/null 2>&1 || return 1 - elif [ -f "/etc/redhat-release" ]; then - echo "Installing $missing_deps on RHEL/CentOS..." - yum install -y $missing_deps >/dev/null 2>&1 || return 1 - else - echo "ERROR: No package manager found, cannot install $missing_deps" >&2 - return 1 - fi - return 0 -} - -# Check for required tools -check_tools() { - local missing="" - - # Check for curl or wget - if ! command -v curl >/dev/null 2>&1 && ! command -v wget >/dev/null 2>&1; then - # Prefer curl as it's more common - missing="curl" - fi - - if [ -n "$missing" ]; then - echo "Missing dependencies:$missing. Attempting to install..." - if ! install_dependencies "$missing"; then - echo "ERROR: Failed to install dependencies" >&2 - exit 1 - fi - - # Verify installation - if ! command -v curl >/dev/null 2>&1 && ! command -v wget >/dev/null 2>&1; then - echo "ERROR: Dependencies still not available after installation" >&2 - exit 1 - fi - fi -} - -# Extract JSON value without jq -# Usage: extract_json_value '{"result":"0x123"}' "result" -extract_json_value() { - local json="$1" - local key="$2" - - # Use sed to extract the value for the given key - # Matches: "key":"value" or "key":value - echo "$json" | sed -n 's/.*"'"$key"'"\s*:\s*"\?\([^,"}\]*\)"\?.*/\1/p' | head -1 -} - -# Make HTTP request (supports both curl and wget) -http_post() { - local url="$1" - local data="$2" - - if command -v curl >/dev/null 2>&1; then - curl -s -m 2 -X POST "$url" \ - -H "Content-Type: application/json" \ - -d "$data" 2>/dev/null - elif command -v wget >/dev/null 2>&1; then - wget -q -O - --timeout=2 --post-data="$data" \ - --header="Content-Type: application/json" \ - "$url" 2>/dev/null - else - echo "ERROR: Neither curl nor wget available" >&2 - return 1 - fi -} - -# Main health check function -check_chain_head() { - local el_url="${1:-http://localhost:8545}" - local block_time="${2:-12}" - local state_file="${3:-/tmp/chain_head_state}" - - # Ensure dependencies are available - check_tools - - # Add wiggle room - block_time=$(expr "$block_time" + 1) - - # Get current block number - response=$(http_post "$el_url" '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}') - - if [ $? -ne 0 ] || [ -z "$response" ]; then - echo "ERROR: Failed to connect to $el_url" - exit 1 - fi - - # Extract the hex block number from JSON response - hex_block=$(extract_json_value "$response" "result") - - if [ -z "$hex_block" ] || [ "$hex_block" = "null" ]; then - echo "ERROR: Failed to get block number from response" - exit 1 - fi - - # Convert hex to decimal - current_block=$(printf "%d" "$hex_block") - current_time=$(date +%s) - - # Read previous state - if [ -f "$state_file" ]; then - read -r prev_block prev_time < "$state_file" - else - # First run - just save state and succeed - echo "$current_block $current_time" > "$state_file" - echo "OK: Initial check, block $current_block" - exit 0 - fi - - # Check if block advanced - if [ "$current_block" -gt "$prev_block" ]; then - # Block advanced - update state and succeed - echo "$current_block $current_time" > "$state_file" - echo "OK: Chain head advanced to $current_block" - exit 0 - fi - - # Block hasn't advanced - check if we've exceeded timeout - elapsed=$(expr "$current_time" - "$prev_time") - - if [ "$elapsed" -ge "$block_time" ]; then - echo "ERROR: Chain head stuck at $current_block for ${elapsed}s (timeout: ${block_time}s)" - exit 1 - fi - - # Block hasn't advanced but still within timeout - echo "OK: Chain head at $current_block, waiting ${elapsed}/${block_time}s" - exit 0 -} - -# Run the check -check_chain_head "$@" From d740405ca02ff9e72efadbece3b160c0e18d8eb7 Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Wed, 17 Dec 2025 12:38:05 +0100 Subject: [PATCH 6/8] Remove more old stuff --- playground/components.go | 27 +++++++++------------------ playground/manifest.go | 4 ---- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/playground/components.go b/playground/components.go index 5bd68f8..a7b40cf 100644 --- a/playground/components.go +++ b/playground/components.go @@ -341,7 +341,7 @@ func (o *OpGeth) Apply(manifest *Manifest) { trustedPeers = fmt.Sprintf("--bootnodes %s ", manifest.ctx.Bootnode.Connect()) } - manifest.NewService("op-geth"). + svc := manifest.NewService("op-geth"). WithImage("us-docker.pkg.dev/oplabs-tools-artifacts/images/op-geth"). WithTag("v1.101503.2-rc.5"). WithEntrypoint("/bin/sh"). @@ -383,14 +383,9 @@ func (o *OpGeth) Apply(manifest *Manifest) { WithVolume("data", "/data_opgeth"). WithArtifact("/data/l2-genesis.json", "l2-genesis.json"). WithArtifact("/data/jwtsecret", "jwtsecret"). - WithArtifact("/data/p2p_key.txt", o.Enode.Artifact). - WithReady(ReadyCheck{ - Test: ElWatch("http://localhost:8545", 2*time.Second), - Interval: 1 * time.Second, - Timeout: 10 * time.Second, - Retries: 20, - StartPeriod: 1 * time.Second, - }) + WithArtifact("/data/p2p_key.txt", o.Enode.Artifact) + + UseHealthmon(manifest, svc) } type RethEL struct { @@ -619,7 +614,7 @@ var opRethRelease = &release{ } func (o *OpReth) Apply(manifest *Manifest) { - manifest.NewService("op-reth"). + svc := manifest.NewService("op-reth"). WithImage("ghcr.io/paradigmxyz/op-reth"). WithTag("nightly"). WithEntrypoint("op-reth"). @@ -641,14 +636,9 @@ func (o *OpReth) Apply(manifest *Manifest) { WithRelease(opRethRelease). WithArtifact("/data/jwtsecret", "jwtsecret"). WithArtifact("/data/l2-genesis.json", "l2-genesis.json"). - WithVolume("data", "/data_op_reth"). - WithReady(ReadyCheck{ - Test: ElWatch("http://localhost:8545", 2*time.Second), - Interval: 1 * time.Second, - Timeout: 10 * time.Second, - Retries: 20, - StartPeriod: 1 * time.Second, - }) + WithVolume("data", "/data_op_reth") + + UseHealthmon(manifest, svc) } type MevBoost struct { @@ -892,6 +882,7 @@ func UseHealthmon(m *Manifest, s *Service) { m.NewService(s.Name+"_healthmon"). WithImage("ghcr.io/flashbots/ethereum-healthmon"). WithTag("v0.0.1"). + // TODO: Use this also for beacon node WithArgs("--chain", "execution", "--url", Connect(s.Name, "http")). WithReady(ReadyCheck{ Test: []string{"CMD", "wget", "--spider", "--quiet", "http://127.0.0.1:21171/ready"}, diff --git a/playground/manifest.go b/playground/manifest.go index 54eeaaa..47721a2 100644 --- a/playground/manifest.go +++ b/playground/manifest.go @@ -471,10 +471,6 @@ func (s *Service) WithReady(check ReadyCheck) *Service { return s } -func ElWatch(endpoint string, blockTime time.Duration) []string { - return []string{"CMD-SHELL", fmt.Sprintf("chmod +x /artifacts/scripts/el_watch.sh && /artifacts/scripts/el_watch.sh %s %d", endpoint, int(blockTime.Seconds()))} -} - type ReadyCheck struct { QueryURL string `json:"query_url"` Test []string `json:"test"` From 3d8d8f61cf3f47cfe800e67a77f10cdfb8f2f7bf Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Wed, 17 Dec 2025 12:39:15 +0100 Subject: [PATCH 7/8] Remove --- playground/local_runner.go | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/playground/local_runner.go b/playground/local_runner.go index d0b7976..001c2ba 100644 --- a/playground/local_runner.go +++ b/playground/local_runner.go @@ -974,43 +974,6 @@ func (d *LocalRunner) Run(ctx context.Context) error { return g.Wait() } -// StopContainersBySessionID removes all Docker containers associated with a specific playground session ID. -// This is a standalone utility function used by the clean command to stop containers without requiring -// a LocalRunner instance or manifest reference. -// -// TODO: Refactor to reduce code duplication with LocalRunner.Stop() -// Consider creating a shared dockerClient wrapper with helper methods for container management -// that both LocalRunner and this function can use. -func StopContainersBySessionID(id string) error { - client, err := newDockerClient() - if err != nil { - return err - } - - containers, err := client.ContainerList(context.Background(), container.ListOptions{ - Filters: filters.NewArgs(filters.Arg("label", fmt.Sprintf("playground.session=%s", id))), - }) - if err != nil { - return fmt.Errorf("error getting container list: %w", err) - } - - g := new(errgroup.Group) - for _, cont := range containers { - g.Go(func() error { - if err := client.ContainerRemove(context.Background(), cont.ID, container.RemoveOptions{ - RemoveVolumes: true, - RemoveLinks: false, - Force: true, - }); err != nil { - return fmt.Errorf("error removing container: %w", err) - } - return nil - }) - } - - return g.Wait() -} - type HealthCheckResponse struct { Output string ExitCode int From 2fa8b1b989d2fbf5d63236a6fbdb5f8f10bad7c1 Mon Sep 17 00:00:00 2001 From: Ferran Borreguero Date: Wed, 17 Dec 2025 12:44:14 +0100 Subject: [PATCH 8/8] Remove unnecesary depends --- playground/components.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/playground/components.go b/playground/components.go index a7b40cf..99ca966 100644 --- a/playground/components.go +++ b/playground/components.go @@ -26,7 +26,6 @@ func (r *RollupBoost) Apply(manifest *Manifest) { service := manifest.NewService("rollup-boost"). WithImage("docker.io/flashbots/rollup-boost"). WithTag("v0.7.5"). - DependsOnHealthy(r.ELNode). WithArgs( "--rpc-host", "0.0.0.0", "--rpc-port", `{{Port "authrpc" 8551}}`, @@ -230,8 +229,6 @@ func (c *ChainMonitor) Apply(manifest *Manifest) { WithPort("metrics", 8080). WithImage("ghcr.io/flashbots/chain-monitor"). WithTag("v0.0.54"). - DependsOnHealthy(c.L1RPC). - DependsOnHealthy(c.L2RPC). WithArgs( "serve", "--l1-rpc", Connect(c.L1RPC, "http"),