diff --git a/src/app/firedancer-dev/commands/backtest.c b/src/app/firedancer-dev/commands/backtest.c index f3fec8adc27..d1c43329efe 100644 --- a/src/app/firedancer-dev/commands/backtest.c +++ b/src/app/firedancer-dev/commands/backtest.c @@ -48,6 +48,7 @@ backtest_topo( config_t * config ) { ulong exec_tile_cnt = config->firedancer.layout.exec_tile_count; ulong lta_tile_cnt = config->firedancer.layout.snapla_tile_count; + ulong snapwr_tile_cnt = config->firedancer.layout.snapwr_tile_count; int disable_snap_loader = !config->gossip.entrypoints_cnt; int snap_vinyl = !!config->firedancer.vinyl.enabled; @@ -115,7 +116,6 @@ backtest_topo( config_t * config ) { /**********************************************************************/ int vinyl_enabled = config->firedancer.vinyl.enabled; fd_topo_tile_t * snapin_tile = NULL; - fd_topo_tile_t * snapwr_tile = NULL; if( FD_UNLIKELY( !disable_snap_loader ) ) { fd_topob_wksp( topo, "snapct" ); fd_topob_wksp( topo, "snapld" ); @@ -141,13 +141,16 @@ backtest_topo( config_t * config ) { snapin_tile->allow_shutdown = 1; if( vinyl_enabled ) { + fd_topob_wksp( topo, "snapwm" ); + fd_topo_tile_t * snapwm_tile = fd_topob_tile( topo, "snapwm", "snapwm", "metric_in", cpu_idx++, 0, 0 ); + snapwm_tile->allow_shutdown = 1; + fd_topob_wksp( topo, "snapwh" ); fd_topo_tile_t * snapwh_tile = fd_topob_tile( topo, "snapwh", "snapwh", "metric_in", cpu_idx++, 0, 0 ); snapwh_tile->allow_shutdown = 1; fd_topob_wksp( topo, "snapwr" ); - snapwr_tile = fd_topob_tile( topo, "snapwr", "snapwr", "metric_in", cpu_idx++, 0, 0 ); - snapwr_tile->allow_shutdown = 1; + FOR(snapwr_tile_cnt) fd_topob_tile( topo, "snapwr", "snapwr", "metric_in", cpu_idx++, 0, 0 )->allow_shutdown = 1; } } else { fd_topob_wksp( topo, "genesi" ); @@ -175,50 +178,73 @@ backtest_topo( config_t * config ) { fd_topob_wksp( topo, "snapct_ld" ); fd_topob_wksp( topo, "snapld_dc" ); fd_topob_wksp( topo, "snapdc_in" ); - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_wksp( topo, "snapin_ct" ); - } else { - fd_topob_wksp( topo, "snapls_ct" ); - } fd_topob_wksp( topo, "snapin_manif" ); fd_topob_wksp( topo, "snapct_repr" ); - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - fd_topob_wksp( topo, "snapla_ls" ); - fd_topob_wksp( topo, "snapin_ls" ); - } - if( vinyl_enabled ) { - fd_topob_wksp( topo, "snapin_wr" ); + fd_topob_wksp( topo, "snapin_txn"); + fd_topob_wksp( topo, "snapin_wm" ); + fd_topob_wksp( topo, "snapwm_wr" ); + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_wksp( topo, "snapwm_ct" ); + } else { + /* TODO pending */ + } + } else { + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_wksp( topo, "snapin_ct" ); + } else { + fd_topob_wksp( topo, "snapla_ls" ); + fd_topob_wksp( topo, "snapin_ls" ); + fd_topob_wksp( topo, "snapls_ct" ); + } } fd_topob_link( topo, "snapct_ld", "snapct_ld", 128UL, sizeof(fd_ssctrl_init_t), 1UL ); fd_topob_link( topo, "snapld_dc", "snapld_dc", 16384UL, USHORT_MAX, 1UL ); fd_topob_link( topo, "snapdc_in", "snapdc_in", 16384UL, USHORT_MAX, 1UL ); - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_link( topo, "snapin_ct", "snapin_ct", 128UL, 0UL, 1UL ); - } + fd_topob_link( topo, "snapin_manif", "snapin_manif", 4UL, sizeof(fd_snapshot_manifest_t), 1UL ); /* TODO: Should be depth 1 or 2 but replay backpressures */ fd_topob_link( topo, "snapct_repr", "snapct_repr", 128UL, 0UL, 1UL )->permit_no_consumers = 1; - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - FOR(lta_tile_cnt) fd_topob_link( topo, "snapla_ls", "snapla_ls", 128UL, sizeof(fd_lthash_value_t), 1UL ); - /**/ fd_topob_link( topo, "snapin_ls", "snapin_ls", 256UL, sizeof(fd_snapshot_full_account_t), 1UL ); - /**/ fd_topob_link( topo, "snapls_ct", "snapls_ct", 128UL, 0UL, 1UL ); - } - if( vinyl_enabled ) { - fd_topo_link_t * snapin_wh = fd_topob_link( topo, "snapin_wh", "snapin_wr", 4UL, 16UL<<20, 1UL ); - fd_topob_link( topo, "snapwh_wr", "snapin_wr", 4UL, 0UL, 1UL ); - fd_pod_insertf_ulong( topo->props, 8UL, "obj.%lu.app_sz", snapin_wh->dcache_obj_id ); + if( FD_LIKELY( snapshot_lthash_disabled ) ) { + fd_topob_link( topo, "snapwm_ct", "snapwm_ct", 128UL, 0UL, 1UL ); + } else { + /* TODO pending */ + } + fd_topob_link( topo, "snapin_txn", "snapin_txn", 4UL, (ulong)((3764697600UL+64UL)/4), 1UL ); /* mtu=(sizeof(fd_sstxncache_entry_t)*(FD_SNAPIN_TXNCACHE_MAX_ENTRIES+1UL))/depth */ + fd_topob_link( topo, "snapin_wm", "snapin_wm", 16UL, 128UL<<20, 1UL ); /* FD_SSPARSE_ACC_BATCH_MAX * 16<<20 */ + fd_topo_link_t * snapwm_wh = + fd_topob_link( topo, "snapwm_wh", "snapwm_wr", 16UL, 16UL<<20, 1UL ); + fd_topob_link( topo, "snapwh_wr", "snapwm_wr", 16UL, 0UL, 1UL ); + fd_pod_insertf_ulong( topo->props, 8UL, "obj.%lu.app_sz", snapwm_wh->dcache_obj_id ); + } else { + if( FD_LIKELY( snapshot_lthash_disabled ) ) { + fd_topob_link( topo, "snapin_ct", "snapin_ct", 128UL, 0UL, 1UL ); + } else { + FOR(lta_tile_cnt) fd_topob_link( topo, "snapla_ls", "snapla_ls", 128UL, sizeof(fd_lthash_value_t), 1UL ); + /**/ fd_topob_link( topo, "snapin_ls", "snapin_ls", 256UL, sizeof(fd_snapshot_full_account_t), 1UL ); + /**/ fd_topob_link( topo, "snapls_ct", "snapls_ct", 128UL, 0UL, 1UL ); + } } - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapin_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + if( vinyl_enabled ) { + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_tile_out( topo, "snapwm", 0UL, "snapwm_ct", 0UL ); + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapwm_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } else { + /* TODO pending */ + } } else { - fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapls_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapin_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } else { + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapls_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } } + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapld_dc", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); fd_topob_tile_out( topo, "snapct", 0UL, "snapct_ld", 0UL ); fd_topob_tile_out( topo, "snapct", 0UL, "snapct_repr", 0UL ); @@ -227,29 +253,35 @@ backtest_topo( config_t * config ) { fd_topob_tile_in ( topo, "snapdc", 0UL, "metric_in", "snapld_dc", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); fd_topob_tile_out( topo, "snapdc", 0UL, "snapdc_in", 0UL ); fd_topob_tile_in ( topo, "snapin", 0UL, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ct", 0UL ); - } else { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ls", 0UL ); - } fd_topob_tile_out( topo, "snapin", 0UL, "snapin_manif", 0UL ); fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "snapin_manif", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapla", i, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - FOR(lta_tile_cnt) fd_topob_tile_out( topo, "snapla", i, "snapla_ls", i ); - /**/ fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapin_ls", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapla_ls", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - /**/ fd_topob_tile_out( topo, "snapls", 0UL, "snapls_ct", 0UL ); - } - if( vinyl_enabled ) { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_wh", 0UL ); - fd_topob_tile_in ( topo, "snapwh", 0UL, "metric_in", "snapin_wh", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + if( FD_LIKELY( !snapshot_lthash_disabled ) ) { + /* TODO pending */ + } + fd_topob_tile_out( topo, "snapin", 0UL, "snapin_wm", 0UL ); + fd_topob_tile_in ( topo, "snapwm", 0UL, "metric_in", "snapin_wm", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + fd_topob_tile_out( topo, "snapin", 0UL, "snapin_txn",0UL ); + fd_topob_tile_in ( topo, "snapwm", 0UL, "metric_in", "snapin_txn",0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + fd_topob_tile_out( topo, "snapwm", 0UL, "snapwm_wh", 0UL ); + fd_topob_tile_in ( topo, "snapwh", 0UL, "metric_in", "snapwm_wh", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); fd_topob_tile_out( topo, "snapwh", 0UL, "snapwh_wr", 0UL ); - fd_topob_tile_in ( topo, "snapwr", 0UL, "metric_in", "snapwh_wr", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - fd_topob_tile_uses( topo, snapwr_tile, &topo->objs[ topo->links[ fd_topo_find_link( topo, "snapin_wh", 0UL ) ].dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY ); + FOR(snapwr_tile_cnt) fd_topob_tile_in ( topo, "snapwr", i, "metric_in", "snapwh_wr", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(snapwr_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "snapwr", i ) ], &topo->objs[ topo->links[ fd_topo_find_link( topo, "snapwm_wh", 0UL ) ].dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY ); + + } else { + if( FD_LIKELY( !snapshot_lthash_disabled ) ) { + FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapla", i, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(lta_tile_cnt) fd_topob_tile_out( topo, "snapla", i, "snapla_ls", i ); + /**/ fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapin_ls", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapla_ls", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + /**/ fd_topob_tile_out( topo, "snapls", 0UL, "snapls_ct", 0UL ); + /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ls", 0UL ); + } else { + /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ct", 0UL ); + } } } else { fd_topob_wksp( topo, "genesi_out" ); diff --git a/src/app/firedancer-dev/commands/snapshot_load.c b/src/app/firedancer-dev/commands/snapshot_load.c index 636e3b446ed..376d3310a24 100644 --- a/src/app/firedancer-dev/commands/snapshot_load.c +++ b/src/app/firedancer-dev/commands/snapshot_load.c @@ -51,6 +51,7 @@ snapshot_load_topo( config_t * config, int snapshot_lthash_disabled = config->development.snapshots.disable_lthash_verification; ulong lta_tile_cnt = config->firedancer.layout.snapla_tile_count; + ulong snapwr_tile_cnt = config->firedancer.layout.snapwr_tile_count; if( config->firedancer.vinyl.enabled ) { setup_topo_vinyl_meta( topo, &config->firedancer ); @@ -64,6 +65,8 @@ snapshot_load_topo( config_t * config, server_wksp->min_loose_sz = 64UL<<20; } +#define FOR(cnt) for( ulong i=0UL; ifiredancer.vinyl.enabled; - fd_topo_tile_t * snapwr_tile = NULL; if( vinyl_enabled ) { + + fd_topob_wksp( topo, "snapwm" ); + fd_topo_tile_t * snapwm_tile = fd_topob_tile( topo, "snapwm", "snapwm", "metric_in", ULONG_MAX, 0, 0 ); + snapwm_tile->allow_shutdown = 1; + fd_topob_wksp( topo, "snapwh" ); fd_topo_tile_t * snapwh_tile = fd_topob_tile( topo, "snapwh", "snapwh", "metric_in", ULONG_MAX, 0, 0 ); snapwh_tile->allow_shutdown = 1; fd_topob_wksp( topo, "snapwr" ); - snapwr_tile = fd_topob_tile( topo, "snapwr", "snapwr", "metric_in", ULONG_MAX, 0, 0 ); - snapwr_tile->allow_shutdown = 1; + FOR(snapwr_tile_cnt) fd_topob_tile( topo, "snapwr", "snapwr", "metric_in", ULONG_MAX, 0, 0 )->allow_shutdown = 1; } fd_topob_wksp( topo, "snapct_ld" ); fd_topob_wksp( topo, "snapld_dc" ); fd_topob_wksp( topo, "snapdc_in" ); - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_wksp( topo, "snapin_ct" ); - } + fd_topob_wksp( topo, "snapin_manif" ); fd_topob_wksp( topo, "snapct_repr" ); if( vinyl_enabled ) { - fd_topob_wksp( topo, "snapin_wr" ); - } - - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - fd_topob_wksp( topo, "snapla" ); - fd_topob_wksp( topo, "snapls" ); - fd_topob_wksp( topo, "snapla_ls" ); - fd_topob_wksp( topo, "snapin_ls" ); - fd_topob_wksp( topo, "snapls_ct" ); + fd_topob_wksp( topo, "snapin_txn"); + fd_topob_wksp( topo, "snapin_wm" ); + fd_topob_wksp( topo, "snapwm_wr" ); + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_wksp( topo, "snapwm_ct" ); + } else { + /* TODO pending */ + } + } else { + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_wksp( topo, "snapin_ct" ); + } else { + fd_topob_wksp( topo, "snapla" ); + fd_topob_wksp( topo, "snapls" ); + fd_topob_wksp( topo, "snapla_ls" ); + fd_topob_wksp( topo, "snapin_ls" ); + fd_topob_wksp( topo, "snapls_ct" ); + } } -#define FOR(cnt) for( ulong i=0UL; iallow_shutdown = 1; /**/ fd_topob_tile( topo, "snapls", "snapls", "metric_in", ULONG_MAX, 0, 0 )->allow_shutdown = 1; @@ -132,27 +143,43 @@ snapshot_load_topo( config_t * config, fd_topob_link( topo, "snapct_ld", "snapct_ld", 128UL, sizeof(fd_ssctrl_init_t), 1UL ); fd_topob_link( topo, "snapld_dc", "snapld_dc", 16384UL, USHORT_MAX, 1UL ); fd_topob_link( topo, "snapdc_in", "snapdc_in", 16384UL, USHORT_MAX, 1UL ); - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_link( topo, "snapin_ct", "snapin_ct", 128UL, 0UL, 1UL ); - } fd_topob_link( topo, "snapin_manif", "snapin_manif", 4UL, sizeof(fd_snapshot_manifest_t), 1UL )->permit_no_consumers = 1; fd_topob_link( topo, "snapct_repr", "snapct_repr", 128UL, 0UL, 1UL )->permit_no_consumers = 1; if( vinyl_enabled ) { - fd_topo_link_t * snapin_wh = fd_topob_link( topo, "snapin_wh", "snapin_wr", 4UL, 16UL<<20, 1UL ); - fd_topob_link( topo, "snapwh_wr", "snapin_wr", 4UL, 0UL, 1UL ); - fd_pod_insertf_ulong( topo->props, 8UL, "obj.%lu.app_sz", snapin_wh->dcache_obj_id ); - } - - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - FOR(lta_tile_cnt) fd_topob_link( topo, "snapla_ls", "snapla_ls", 128UL, sizeof(fd_lthash_value_t), 1UL ); - /**/ fd_topob_link( topo, "snapin_ls", "snapin_ls", 256UL, sizeof(fd_snapshot_full_account_t), 1UL ); - /**/ fd_topob_link( topo, "snapls_ct", "snapls_ct", 128UL, 0UL, 1UL ); + if( FD_LIKELY( snapshot_lthash_disabled ) ) { + fd_topob_link( topo, "snapwm_ct", "snapwm_ct", 128UL, 0UL, 1UL ); + } else { + /* TODO pending */ + } + fd_topob_link( topo, "snapin_txn", "snapin_txn", 4UL, (ulong)((3764697600UL+64UL)/4), 1UL ); /* mtu=(sizeof(fd_sstxncache_entry_t)*(FD_SNAPIN_TXNCACHE_MAX_ENTRIES+1UL))/depth */ + fd_topob_link( topo, "snapin_wm", "snapin_wm", 16UL, 128UL<<20, 1UL ); /* FD_SSPARSE_ACC_BATCH_MAX * 16<<20 */ + fd_topo_link_t * snapwm_wh = + fd_topob_link( topo, "snapwm_wh", "snapwm_wr", 16UL, 16UL<<20, 1UL ); + fd_topob_link( topo, "snapwh_wr", "snapwm_wr", 16UL, 0UL, 1UL ); + fd_pod_insertf_ulong( topo->props, 8UL, "obj.%lu.app_sz", snapwm_wh->dcache_obj_id ); + } else { + if( FD_LIKELY( snapshot_lthash_disabled ) ) { + fd_topob_link( topo, "snapin_ct", "snapin_ct", 128UL, 0UL, 1UL ); + } else { + FOR(lta_tile_cnt) fd_topob_link( topo, "snapla_ls", "snapla_ls", 128UL, sizeof(fd_lthash_value_t), 1UL ); + /**/ fd_topob_link( topo, "snapin_ls", "snapin_ls", 256UL, sizeof(fd_snapshot_full_account_t), 1UL ); + /**/ fd_topob_link( topo, "snapls_ct", "snapls_ct", 128UL, 0UL, 1UL ); + } } - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapin_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + if( vinyl_enabled ) { + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_tile_out( topo, "snapwm", 0UL, "snapwm_ct", 0UL ); + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapwm_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } else { + /* TODO pending */ + } } else { - fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapls_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapin_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } else { + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapls_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } } fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapld_dc", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); fd_topob_tile_out( topo, "snapct", 0UL, "snapct_ld", 0UL ); @@ -162,25 +189,32 @@ snapshot_load_topo( config_t * config, fd_topob_tile_in ( topo, "snapdc", 0UL, "metric_in", "snapld_dc", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); fd_topob_tile_out( topo, "snapdc", 0UL, "snapdc_in", 0UL ); fd_topob_tile_in ( topo, "snapin", 0UL, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ct", 0UL ); - } else { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ls", 0UL ); - } fd_topob_tile_out( topo, "snapin", 0UL, "snapin_manif", 0UL ); if( vinyl_enabled ) { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_wh", 0UL ); - fd_topob_tile_in ( topo, "snapwh", 0UL, "metric_in", "snapin_wh", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + if( FD_LIKELY( !snapshot_lthash_disabled ) ) { + /* TODO pending */ + } + fd_topob_tile_out( topo, "snapin", 0UL, "snapin_wm", 0UL ); + fd_topob_tile_in ( topo, "snapwm", 0UL, "metric_in", "snapin_wm", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + fd_topob_tile_out( topo, "snapin", 0UL, "snapin_txn",0UL ); + fd_topob_tile_in ( topo, "snapwm", 0UL, "metric_in", "snapin_txn",0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + fd_topob_tile_out( topo, "snapwm", 0UL, "snapwm_wh", 0UL ); + fd_topob_tile_in ( topo, "snapwh", 0UL, "metric_in", "snapwm_wh", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); fd_topob_tile_out( topo, "snapwh", 0UL, "snapwh_wr", 0UL ); - fd_topob_tile_in ( topo, "snapwr", 0UL, "metric_in", "snapwh_wr", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - fd_topob_tile_uses( topo, snapwr_tile, &topo->objs[ topo->links[ fd_topo_find_link( topo, "snapin_wh", 0UL ) ].dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY ); - } - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapla", i, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - FOR(lta_tile_cnt) fd_topob_tile_out( topo, "snapla", i, "snapla_ls", i ); - /**/ fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapin_ls", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapla_ls", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - /**/ fd_topob_tile_out( topo, "snapls", 0UL, "snapls_ct", 0UL ); + FOR(snapwr_tile_cnt) fd_topob_tile_in ( topo, "snapwr", i, "metric_in", "snapwh_wr", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(snapwr_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "snapwr", i ) ], &topo->objs[ topo->links[ fd_topo_find_link( topo, "snapwm_wh", 0UL ) ].dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY ); + + } else { + if( FD_LIKELY( !snapshot_lthash_disabled ) ) { + FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapla", i, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(lta_tile_cnt) fd_topob_tile_out( topo, "snapla", i, "snapla_ls", i ); + /**/ fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapin_ls", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(lta_tile_cnt) fd_topob_tile_in ( topo, "snapls", 0UL, "metric_in", "snapla_ls", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + /**/ fd_topob_tile_out( topo, "snapls", 0UL, "snapls_ct", 0UL ); + /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ls", 0UL ); + } else { + /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ct", 0UL ); + } } /* snapin funk / txncache access */ @@ -634,8 +668,10 @@ snapshot_load_cmd_fn( args_t * args, fd_topo_tile_t * snapld_tile = &topo->tiles[ fd_topo_find_tile( topo, "snapld", 0UL ) ]; fd_topo_tile_t * snapdc_tile = &topo->tiles[ fd_topo_find_tile( topo, "snapdc", 0UL ) ]; fd_topo_tile_t * snapin_tile = &topo->tiles[ fd_topo_find_tile( topo, "snapin", 0UL ) ]; + ulong snapwm_idx = fd_topo_find_tile( topo, "snapwm", 0UL ); ulong snapwh_idx = fd_topo_find_tile( topo, "snapwh", 0UL ); ulong snapwr_idx = fd_topo_find_tile( topo, "snapwr", 0UL ); + fd_topo_tile_t * snapwm_tile = snapwm_idx!=ULONG_MAX ? &topo->tiles[ snapwm_idx ] : NULL; fd_topo_tile_t * snapwh_tile = snapwh_idx!=ULONG_MAX ? &topo->tiles[ snapwh_idx ] : NULL; fd_topo_tile_t * snapwr_tile = snapwr_idx!=ULONG_MAX ? &topo->tiles[ snapwr_idx ] : NULL; ulong snapla_idx = fd_topo_find_tile( topo, "snapla", 0UL ); @@ -653,6 +689,7 @@ snapshot_load_cmd_fn( args_t * args, ulong volatile * const snapld_metrics = fd_metrics_tile( snapld_tile->metrics ); ulong volatile * const snapdc_metrics = fd_metrics_tile( snapdc_tile->metrics ); ulong volatile * const snapin_metrics = fd_metrics_tile( snapin_tile->metrics ); + ulong volatile * const snapwm_metrics = snapwm_tile ? fd_metrics_tile( snapwm_tile->metrics ) : NULL; ulong volatile * const snapwh_metrics = snapwh_tile ? fd_metrics_tile( snapwh_tile->metrics ) : NULL; ulong volatile * const snapwr_metrics = snapwr_tile ? fd_metrics_tile( snapwr_tile->metrics ) : NULL; ulong volatile * const snapla_metrics = snapla_tile ? fd_metrics_tile( snapla_tile->metrics ) : NULL; @@ -667,6 +704,9 @@ snapshot_load_cmd_fn( args_t * args, ulong snapdc_wait_old = 0UL; ulong snapin_backp_old = 0UL; ulong snapin_wait_old = 0UL; + ulong snapwm_backp_old = 0UL; + ulong snapwm_wait_old = 0UL; + ulong snapwh_backp_old = 0UL; ulong snapwh_wait_old = 0UL; ulong snapwr_wait_old = 0UL; ulong snapla_backp_old = 0UL; @@ -686,10 +726,10 @@ snapshot_load_cmd_fn( args_t * args, puts( "- acc: Number of accounts" ); puts( "" ); fputs( "--------------------------------------------", stdout ); - if( snapwr_tile ) fputs( "--------------", stdout ); - if( snapls_tile ) fputs( "[ld],[dc],[in],[la],[ls]--------[ld],[dc],[in],[la],[ls]", stdout ); - else fputs( "[ld],[dc],[in]--------[ld],[dc],[in]", stdout ); - if( snapwr_tile ) fputs( ",[wh],[wr]" , stdout ); + if( snapwr_tile ) fputs( "--------------", stdout ); + if( snapls_tile ) fputs( "[ld],[dc],[in],[la],[ls]--------[ld],[dc],[in],[la],[ls]", stdout ); + else if( snapwr_tile ) fputs( "[ld],[dc],[in],[wm],[wh]--------[ld],[dc],[in],[wm],[wh],[wr]", stdout ); + else fputs( "[ld],[dc],[in]--------[ld],[dc],[in]", stdout ); puts( "--------------" ); } @@ -721,17 +761,23 @@ snapshot_load_cmd_fn( args_t * args, ulong snapdc_wait = snapdc_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapdc_backp; ulong snapin_backp = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ]; ulong snapin_wait = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapin_backp; + ulong snapwm_backp = 0UL; + ulong snapwm_wait = 0UL; + ulong snapwh_backp = 0UL; ulong snapwh_wait = 0UL; + ulong snapwr_backp = 0UL; ulong snapwr_wait = 0UL; ulong snapla_backp = snapla_metrics ? snapla_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ] : 0UL; ulong snapla_wait = snapla_metrics ? snapla_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapla_backp : 0UL; ulong snapls_backp = snapls_metrics ? snapls_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ] : 0UL; ulong snapls_wait = snapls_metrics ? snapls_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapls_backp : 0UL; if( snapwr_tile ) { - snapwh_wait = snapwh_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + - snapwh_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ]; - snapwr_wait = snapwr_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + - snapwr_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ]; + snapwm_backp = snapwm_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ]; + snapwm_wait = snapwm_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapwm_backp; + snapwh_backp = snapwh_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ]; + snapwh_wait = snapwh_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapwh_backp; + snapwr_backp = snapwr_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ]; + snapwr_wait = snapwr_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapwr_backp; } double progress = 100.0 * (double)snapct_metrics[ MIDX( GAUGE, SNAPCT, FULL_BYTES_READ ) ] / (double)snapct_metrics[ MIDX( GAUGE, SNAPCT, FULL_BYTES_TOTAL ) ]; @@ -746,39 +792,39 @@ snapshot_load_cmd_fn( args_t * args, if( snapwr_tile ) { printf( " vinyl=%4.0fMB/s", (double)( vinyl_off - vinyl_off_old )/1e6 ); } - if( !snapls_tile ) { - printf( " backp=(%3.0f%%,%3.0f%%,%3.0f%%", + + printf( " backp=(%3.0f%%,%3.0f%%,%3.0f%%", ( (double)( snapld_backp-snapld_backp_old )*ns_per_tick )/1e7, ( (double)( snapdc_backp-snapdc_backp_old )*ns_per_tick )/1e7, ( (double)( snapin_backp-snapin_backp_old )*ns_per_tick )/1e7 ); - } - else { - printf( " backp=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%", - ( (double)( snapld_backp-snapld_backp_old )*ns_per_tick )/1e7, - ( (double)( snapdc_backp-snapdc_backp_old )*ns_per_tick )/1e7, - ( (double)( snapin_backp-snapin_backp_old )*ns_per_tick )/1e7, + if( snapls_tile ) { + printf( ",%3.0f%%,%3.0f%%", ( (double)( snapla_backp-snapla_backp_old )*ns_per_tick )/1e7, ( (double)( snapls_backp-snapls_backp_old )*ns_per_tick )/1e7 ); + } else if( snapwr_tile ) { + printf( ",%3.0f%%,%3.0f%%", + ( (double)( snapwm_backp-snapwm_backp_old )*ns_per_tick )/1e7, + ( (double)( snapwh_backp-snapwh_backp_old )*ns_per_tick )/1e7 ); } - if( !snapls_tile ) { - printf( ") busy=(%3.0f%%,%3.0f%%,%3.0f%%", - 100-( ( (double)( snapld_wait-snapld_wait_old )*ns_per_tick )/1e7 ), - 100-( ( (double)( snapdc_wait-snapdc_wait_old )*ns_per_tick )/1e7 ), - 100-( ( (double)( snapin_wait-snapin_wait_old )*ns_per_tick )/1e7 ) ); - } else { - printf( ") busy=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%", - 100-( ( (double)( snapld_wait-snapld_wait_old )*ns_per_tick )/1e7 ), - 100-( ( (double)( snapdc_wait-snapdc_wait_old )*ns_per_tick )/1e7 ), - 100-( ( (double)( snapin_wait-snapin_wait_old )*ns_per_tick )/1e7 ), - 100-( ( (double)( snapla_wait-snapla_wait_old )*ns_per_tick )/1e7 ), - 100-( ( (double)( snapls_wait-snapls_wait_old )*ns_per_tick )/1e7 ) ); - } - if( snapwr_tile ) { + printf( ")" ); + + printf( " busy=(%3.0f%%,%3.0f%%,%3.0f%%", + 100-( ( (double)( snapld_wait-snapld_wait_old )*ns_per_tick )/1e7 ), + 100-( ( (double)( snapdc_wait-snapdc_wait_old )*ns_per_tick )/1e7 ), + 100-( ( (double)( snapin_wait-snapin_wait_old )*ns_per_tick )/1e7 ) ); + if( snapls_tile ) { printf( ",%3.0f%%,%3.0f%%", - 100-( ( (double)( snapwh_wait-snapwh_wait_old )*ns_per_tick )/1e7 ), - 100-( ( (double)( snapwr_wait-snapwr_wait_old )*ns_per_tick )/1e7 ) ); + 100-( ( (double)( snapla_wait-snapla_wait_old )*ns_per_tick )/1e7 ), + 100-( ( (double)( snapls_wait-snapls_wait_old )*ns_per_tick )/1e7 ) ); + } else if( snapwr_tile ) { + printf( ",%3.0f%%,%3.0f%%,%3.0f%%", + 100-( ( (double)( snapwm_wait-snapwm_wait_old )*ns_per_tick )/1e7 ), + 100-( ( (double)( snapwh_wait-snapwh_wait_old )*ns_per_tick )/1e7 ), + 100-( ( (double)( snapwr_wait-snapwr_wait_old )*ns_per_tick )/1e7 ) ); } - printf( ") acc=%4.1f M/s\n", + printf( ")" ); + + printf( " acc=%4.1f M/s\n", (double)( acc_cnt-acc_cnt_old )/1e6 ); fflush( stdout ); } @@ -791,6 +837,9 @@ snapshot_load_cmd_fn( args_t * args, snapdc_wait_old = snapdc_wait; snapin_backp_old = snapin_backp; snapin_wait_old = snapin_wait; + snapwm_backp_old = snapwm_backp; + snapwm_wait_old = snapwm_wait; + snapwh_backp_old = snapwh_backp; snapwh_wait_old = snapwh_wait; snapwr_wait_old = snapwr_wait; snapla_backp_old = snapla_backp; diff --git a/src/app/firedancer-dev/main.c b/src/app/firedancer-dev/main.c index 58c2772a51e..7ddefde9fa9 100644 --- a/src/app/firedancer-dev/main.c +++ b/src/app/firedancer-dev/main.c @@ -114,6 +114,7 @@ extern fd_topo_run_tile_t fd_tile_snapct; extern fd_topo_run_tile_t fd_tile_snapld; extern fd_topo_run_tile_t fd_tile_snapdc; extern fd_topo_run_tile_t fd_tile_snapin; +extern fd_topo_run_tile_t fd_tile_snapwm; extern fd_topo_run_tile_t fd_tile_snapwh; extern fd_topo_run_tile_t fd_tile_snapwr; extern fd_topo_run_tile_t fd_tile_snapla; @@ -162,6 +163,7 @@ fd_topo_run_tile_t * TILES[] = { &fd_tile_snapld, &fd_tile_snapdc, &fd_tile_snapin, + &fd_tile_snapwm, &fd_tile_snapwh, &fd_tile_snapwr, &fd_tile_snapla, diff --git a/src/app/firedancer/config/default.toml b/src/app/firedancer/config/default.toml index 62a32dd534d..96c7cee049d 100644 --- a/src/app/firedancer/config/default.toml +++ b/src/app/firedancer/config/default.toml @@ -809,6 +809,13 @@ user = "" # error. snapla_tile_count = 4 + # How many snapshot vinyl-based write tiles to run. Depending on + # the specific hardware configuration, multiple snapwr tiles may + # be required to achieve high throughput. It is also possible to + # tradeoff throughput and tile count. This value is only valid + # when vinyl is enabled. + snapwr_tile_count = 2 + # All memory that will be used in Firedancer is pre-allocated in two # kinds of pages: huge and gigantic. Huge pages are 2 MiB and gigantic # pages are 1 GiB. This is done to prevent TLB misses which can have a diff --git a/src/app/firedancer/topology.c b/src/app/firedancer/topology.c index 23100fa5482..5810642e5eb 100644 --- a/src/app/firedancer/topology.c +++ b/src/app/firedancer/topology.c @@ -335,6 +335,7 @@ fd_topo_initialize( config_t * config ) { ulong exec_tile_cnt = config->firedancer.layout.exec_tile_count; ulong sign_tile_cnt = config->firedancer.layout.sign_tile_count; ulong lta_tile_cnt = config->firedancer.layout.snapla_tile_count; + ulong snapwr_tile_cnt = config->firedancer.layout.snapwr_tile_count; int snapshots_enabled = !!config->gossip.entrypoints_cnt; int vinyl_enabled = !!config->firedancer.vinyl.enabled; @@ -437,6 +438,7 @@ fd_topo_initialize( config_t * config ) { fd_topob_wksp( topo, "snapdc" ); fd_topob_wksp( topo, "snapin" ); if( vinyl_enabled ) { + fd_topob_wksp( topo, "snapwm" ); fd_topob_wksp( topo, "snapwh" ); fd_topob_wksp( topo, "snapwr" ); } @@ -444,25 +446,31 @@ fd_topo_initialize( config_t * config ) { fd_topob_wksp( topo, "snapct_ld" ); fd_topob_wksp( topo, "snapld_dc" ); fd_topob_wksp( topo, "snapdc_in" ); - if( vinyl_enabled ) fd_topob_wksp( topo, "snapin_wr" ); - - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_wksp( topo, "snapin_ct" ); + if( vinyl_enabled ) { + fd_topob_wksp( topo, "snapin_txn"); + fd_topob_wksp( topo, "snapin_wm" ); + fd_topob_wksp( topo, "snapwm_wr" ); + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_wksp( topo, "snapwm_ct" ); + } else { + /* TODO pending */ + } } else { - fd_topob_wksp( topo, "snapls_ct" ); + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_wksp( topo, "snapin_ct" ); + } else { + fd_topob_wksp( topo, "snapla" ); + fd_topob_wksp( topo, "snapls" ); + fd_topob_wksp( topo, "snapla_ls" ); + fd_topob_wksp( topo, "snapin_ls" ); + fd_topob_wksp( topo, "snapls_ct" ); + } } if( FD_LIKELY( config->tiles.gui.enabled ) ) fd_topob_wksp( topo, "snapct_gui" ); if( FD_LIKELY( config->tiles.gui.enabled ) ) fd_topob_wksp( topo, "snapin_gui" ); fd_topob_wksp( topo, "snapin_manif" ); fd_topob_wksp( topo, "snapct_repr" ); - - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - fd_topob_wksp( topo, "snapla" ); - fd_topob_wksp( topo, "snapls" ); - fd_topob_wksp( topo, "snapla_ls" ); - fd_topob_wksp( topo, "snapin_ls" ); - } } #define FOR(cnt) for( ulong i=0UL; ipermit_no_consumers = 1; /* TODO: wire in repair later */ if( FD_LIKELY( config->tiles.gui.enabled ) ) { @@ -493,14 +497,25 @@ fd_topo_initialize( config_t * config ) { /**/ fd_topob_link( topo, "snapin_gui", "snapin_gui", 128UL, FD_GUI_CONFIG_PARSE_MAX_VALID_ACCT_SZ_WITH_NULL, 1UL ); } if( vinyl_enabled ) { - fd_topo_link_t * snapin_wh = - /**/ fd_topob_link( topo, "snapin_wh", "snapin_wr", 4UL, 16UL<<20, 1UL ); - /**/ fd_topob_link( topo, "snapwh_wr", "snapin_wr", 4UL, 0UL, 1UL ); - fd_pod_insertf_ulong( topo->props, 8UL, "obj.%lu.app_sz", snapin_wh->dcache_obj_id ); - } - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - FOR(lta_tile_cnt) fd_topob_link( topo, "snapla_ls", "snapla_ls", 128UL, sizeof(fd_lthash_value_t), 1UL ); - /**/ fd_topob_link( topo, "snapin_ls", "snapin_ls", 256UL, sizeof(fd_snapshot_full_account_t), 1UL ); + if( FD_LIKELY( snapshot_lthash_disabled ) ) { + /**/ fd_topob_link( topo, "snapwm_ct", "snapwm_ct", 128UL, 0UL, 1UL ); + } else { + /* TODO pending */ + } + /**/ fd_topob_link( topo, "snapin_txn", "snapin_txn", 4UL, (ulong)((3764697600UL+64UL)/4), 1UL ); /* mtu=(sizeof(fd_sstxncache_entry_t)*(FD_SNAPIN_TXNCACHE_MAX_ENTRIES+1UL))/depth */ + /**/ fd_topob_link( topo, "snapin_wm", "snapin_wm", 256UL, 16UL<<20, 16UL ); + fd_topo_link_t * snapwm_wh = + /**/ fd_topob_link( topo, "snapwm_wh", "snapwm_wr", 4UL, 16UL<<20, 1UL ); + /**/ fd_topob_link( topo, "snapwh_wr", "snapwm_wr", 4UL, 0UL, 1UL ); + fd_pod_insertf_ulong( topo->props, 8UL, "obj.%lu.app_sz", snapwm_wh->dcache_obj_id ); + } else { + if( FD_LIKELY( snapshot_lthash_disabled ) ) { + /**/ fd_topob_link( topo, "snapin_ct", "snapin_ct", 128UL, 0UL, 1UL ); + } else { + FOR(lta_tile_cnt) fd_topob_link( topo, "snapla_ls", "snapla_ls", 128UL, sizeof(fd_lthash_value_t), 1UL ); + /**/ fd_topob_link( topo, "snapin_ls", "snapin_ls", 256UL, sizeof(fd_snapshot_full_account_t), 1UL ); + /**/ fd_topob_link( topo, "snapls_ct", "snapls_ct", 128UL, 0UL, 1UL ); + } } } @@ -590,12 +605,18 @@ fd_topo_initialize( config_t * config ) { /**/ fd_topob_tile( topo, "snapld", "snapld", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; /**/ fd_topob_tile( topo, "snapdc", "snapdc", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; /**/ fd_topob_tile( topo, "snapin", "snapin", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; + if(vinyl_enabled) fd_topob_tile( topo, "snapwm", "snapwm", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; if(vinyl_enabled) fd_topob_tile( topo, "snapwh", "snapwh", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; - if(vinyl_enabled) fd_topob_tile( topo, "snapwr", "snapwr", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; + if(vinyl_enabled) FOR(snapwr_tile_cnt) + fd_topob_tile( topo, "snapwr", "snapwr", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - FOR(lta_tile_cnt) fd_topob_tile( topo, "snapla", "snapla", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; - /**/ fd_topob_tile( topo, "snapls", "snapls", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; + if(vinyl_enabled) { + /* TODO pending*/ + } else { + FOR(lta_tile_cnt) fd_topob_tile( topo, "snapla", "snapla", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; + /**/ fd_topob_tile( topo, "snapls", "snapls", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 )->allow_shutdown = 1; + } } } @@ -667,11 +688,6 @@ fd_topo_initialize( config_t * config ) { /**/ fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "gossip_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); } - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapin_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - } else { - fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapls_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - } fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapld_dc", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); fd_topob_tile_out( topo, "snapct", 0UL, "snapct_ld", 0UL ); fd_topob_tile_out( topo, "snapct", 0UL, "snapct_repr", 0UL ); @@ -679,13 +695,35 @@ fd_topo_initialize( config_t * config ) { /**/ fd_topob_tile_out( topo, "snapct", 0UL, "snapct_gui", 0UL ); } if( vinyl_enabled ) { - /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_wh", 0UL ); - /**/ fd_topob_tile_in ( topo, "snapwh", 0UL, "metric_in", "snapin_wh", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + if( FD_LIKELY( snapshot_lthash_disabled ) ) { + /**/ fd_topob_tile_out( topo, "snapwm", 0UL, "snapwm_ct", 0UL ); + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapwm_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } else { + /* TODO pending */ + } + /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_wm", 0UL ); + /**/ fd_topob_tile_in ( topo, "snapwm", 0UL, "metric_in", "snapin_wm", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_txn", 0UL ); + /**/ fd_topob_tile_in ( topo, "snapwm", 0UL, "metric_in", "snapin_txn", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + /**/ fd_topob_tile_out( topo, "snapwm", 0UL, "snapwm_wh", 0UL ); + /**/ fd_topob_tile_in ( topo, "snapwh", 0UL, "metric_in", "snapwm_wh", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); /**/ fd_topob_tile_out( topo, "snapwh", 0UL, "snapwh_wr", 0UL ); - /**/ fd_topob_tile_in ( topo, "snapwr", 0UL, "metric_in", "snapwh_wr", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "snapwr", 0UL ) ], &topo->objs[ topo->links[ fd_topo_find_link( topo, "snapin_wh", 0UL ) ].dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY ); + FOR(snapwr_tile_cnt) fd_topob_tile_in(topo, "snapwr", i, "metric_in", "snapwh_wr", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(snapwr_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "snapwr", i ) ], &topo->objs[ topo->links[ fd_topo_find_link( topo, "snapwm_wh", 0UL ) ].dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY ); + } else { + if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { + fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ct", 0UL ); + fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapin_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } else { + fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ls", 0UL ); + FOR(lta_tile_cnt) fd_topob_tile_in( topo, "snapla", i, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(lta_tile_cnt) fd_topob_tile_out( topo, "snapla", i, "snapla_ls", i ); + /**/ fd_topob_tile_in( topo, "snapls", 0UL, "metric_in", "snapin_ls", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + FOR(lta_tile_cnt) fd_topob_tile_in( topo, "snapls", 0UL, "metric_in", "snapla_ls", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + /**/ fd_topob_tile_out( topo, "snapls", 0UL, "snapls_ct", 0UL ); + /**/ fd_topob_tile_in ( topo, "snapct", 0UL, "metric_in", "snapls_ct", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + } } - /**/ fd_topob_tile_in ( topo, "snapld", 0UL, "metric_in", "snapct_ld", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); /**/ fd_topob_tile_out( topo, "snapld", 0UL, "snapld_dc", 0UL ); @@ -695,20 +733,8 @@ fd_topo_initialize( config_t * config ) { fd_topob_tile_in ( topo, "snapin", 0UL, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); if( FD_LIKELY( config->tiles.gui.enabled ) ) { /**/ fd_topob_tile_out( topo, "snapin", 0UL, "snapin_gui", 0UL ); - } - if( FD_UNLIKELY( snapshot_lthash_disabled ) ) { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ct", 0UL ); - } else { - fd_topob_tile_out( topo, "snapin", 0UL, "snapin_ls", 0UL ); } fd_topob_tile_out( topo, "snapin", 0UL, "snapin_manif", 0UL ); - if( FD_LIKELY( !snapshot_lthash_disabled ) ) { - FOR(lta_tile_cnt) fd_topob_tile_in( topo, "snapla", i, "metric_in", "snapdc_in", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - FOR(lta_tile_cnt) fd_topob_tile_out( topo, "snapla", i, "snapla_ls", i ); - /**/ fd_topob_tile_in( topo, "snapls", 0UL, "metric_in", "snapin_ls", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - FOR(lta_tile_cnt) fd_topob_tile_in( topo, "snapls", 0UL, "metric_in", "snapla_ls", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); - /**/ fd_topob_tile_out( topo, "snapls", 0UL, "snapls_ct", 0UL ); - } } /**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "genesi_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); @@ -1196,26 +1222,29 @@ fd_topo_configure_tile( fd_topo_tile_t * tile, tile->snapin.use_vinyl = !!config->firedancer.vinyl.enabled; tile->snapin.lthash_disabled = !!config->development.snapshots.disable_lthash_verification; - if( tile->snapin.use_vinyl ) { - strcpy( tile->snapin.vinyl_path, config->paths.accounts ); - tile->snapin.vinyl_meta_map_obj_id = fd_pod_query_ulong( config->topo.props, "vinyl.meta_map", ULONG_MAX ); - tile->snapin.vinyl_meta_pool_obj_id = fd_pod_query_ulong( config->topo.props, "vinyl.meta_pool", ULONG_MAX ); - - ulong in_wr_link_id = fd_topo_find_link( &config->topo, "snapin_wh", 0UL ); - FD_TEST( in_wr_link_id!=ULONG_MAX ); - fd_topo_link_t * in_wr_link = &config->topo.links[ in_wr_link_id ]; - tile->snapin.snapwr_depth = in_wr_link->depth; - } + + } else if( FD_UNLIKELY( !strcmp( tile->name, "snapwm" ) ) ) { + + tile->snapwm.lthash_disabled = !!config->development.snapshots.disable_lthash_verification; + strcpy( tile->snapwm.vinyl_path, config->paths.accounts ); + tile->snapwm.vinyl_meta_map_obj_id = fd_pod_query_ulong( config->topo.props, "vinyl.meta_map", ULONG_MAX ); + tile->snapwm.vinyl_meta_pool_obj_id = fd_pod_query_ulong( config->topo.props, "vinyl.meta_pool", ULONG_MAX ); + + ulong wm_wr_link_id = fd_topo_find_link( &config->topo, "snapwm_wh", 0UL ); + FD_TEST( wm_wr_link_id!=ULONG_MAX ); + fd_topo_link_t * wm_wr_link = &config->topo.links[ wm_wr_link_id ]; + tile->snapwm.snapwr_depth = wm_wr_link->depth; } else if( FD_UNLIKELY( !strcmp( tile->name, "snapwh" ) ) ) { } else if( FD_UNLIKELY( !strcmp( tile->name, "snapwr" ) ) ) { strcpy( tile->snapwr.vinyl_path, config->paths.accounts ); - ulong in_wr_link_id = fd_topo_find_link( &config->topo, "snapin_wh", 0UL ); - FD_TEST( in_wr_link_id!=ULONG_MAX ); - fd_topo_link_t * in_wr_link = &config->topo.links[ in_wr_link_id ]; - tile->snapwr.dcache_obj_id = in_wr_link->dcache_obj_id; + ulong wm_wr_link_id = fd_topo_find_link( &config->topo, "snapwm_wh", 0UL ); + FD_TEST( wm_wr_link_id!=ULONG_MAX ); + fd_topo_link_t * wm_wr_link = &config->topo.links[ wm_wr_link_id ]; + tile->snapwr.dcache_obj_id = wm_wr_link->dcache_obj_id; + } else if( FD_UNLIKELY( !strcmp( tile->name, "snapla" ) ) ) { } else if( FD_UNLIKELY( !strcmp( tile->name, "snapls" ) ) ) { diff --git a/src/app/shared/fd_config.c b/src/app/shared/fd_config.c index 4ddb44ab586..61118d06923 100644 --- a/src/app/shared/fd_config.c +++ b/src/app/shared/fd_config.c @@ -467,6 +467,7 @@ static void fd_config_validatef( fd_configf_t const * config ) { CFG_HAS_NON_ZERO( layout.sign_tile_count ); CFG_HAS_NON_ZERO( layout.snapla_tile_count ); + CFG_HAS_NON_ZERO( layout.snapwr_tile_count ); if( FD_UNLIKELY( config->layout.sign_tile_count < 2 ) ) { FD_LOG_ERR(( "layout.sign_tile_count must be >= 2" )); } diff --git a/src/app/shared/fd_config.h b/src/app/shared/fd_config.h index 561c3129162..f290dbe1a2f 100644 --- a/src/app/shared/fd_config.h +++ b/src/app/shared/fd_config.h @@ -119,6 +119,7 @@ struct fd_configf { uint sign_tile_count; uint gossvf_tile_count; uint snapla_tile_count; + uint snapwr_tile_count; } layout; struct { diff --git a/src/app/shared/fd_config_parse.c b/src/app/shared/fd_config_parse.c index 8598410de95..24baf0c73a9 100644 --- a/src/app/shared/fd_config_parse.c +++ b/src/app/shared/fd_config_parse.c @@ -83,6 +83,7 @@ fd_config_extract_podf( uchar * pod, CFG_POP ( uint, layout.sign_tile_count ); CFG_POP ( uint, layout.gossvf_tile_count ); CFG_POP ( uint, layout.snapla_tile_count ); + CFG_POP ( uint, layout.snapwr_tile_count ); CFG_POP ( ulong, funk.max_account_records ); CFG_POP ( ulong, funk.heap_size_gib ); diff --git a/src/disco/topo/fd_topo.h b/src/disco/topo/fd_topo.h index eed8dc4411f..07791deae97 100644 --- a/src/disco/topo/fd_topo.h +++ b/src/disco/topo/fd_topo.h @@ -567,11 +567,15 @@ struct fd_topo_tile { uint lthash_disabled : 1; uint use_vinyl : 1; + } snapin; + + struct { + uint lthash_disabled : 1; ulong vinyl_meta_map_obj_id; ulong vinyl_meta_pool_obj_id; ulong snapwr_depth; char vinyl_path[ PATH_MAX ]; - } snapin; + } snapwm; struct { ulong dcache_obj_id; diff --git a/src/disco/topo/fd_topob.c b/src/disco/topo/fd_topob.c index 956a802c430..385d9d89963 100644 --- a/src/disco/topo/fd_topob.c +++ b/src/disco/topo/fd_topob.c @@ -385,6 +385,7 @@ fd_topob_auto_layout( fd_topo_t * topo, "snapld", /* FIREDANCER only */ "snapdc", /* FIREDANCER only */ "snapin", /* FIREDANCER only */ + "snapwm", /* FIREDANCER only */ "snapwh", /* FIREDANCER only */ "snapla", /* FIREDANCER only */ "snapls", /* FIREDANCER only */ @@ -397,8 +398,11 @@ fd_topob_auto_layout( fd_topo_t * topo, "pack", "poh", "gui", + "snapld", /* TODO: Snapshot loading speed depends on having full core */ "snapdc", /* TODO: Snapshot loading speed depends on having full core */ "snapin", /* TODO: Snapshot loading speed depends on having full core */ + "snapwm", /* TODO: Snapshot loading speed depends on having full core */ + "snapwh", /* TODO: Snapshot loading speed depends on having full core */ }; for( ulong i=0UL; itile_cnt; i++ ) { diff --git a/src/discof/restore/Local.mk b/src/discof/restore/Local.mk index ec6d8e6a8a4..43e86ea7aef 100644 --- a/src/discof/restore/Local.mk +++ b/src/discof/restore/Local.mk @@ -7,6 +7,7 @@ ifdef FD_HAS_ZSTD $(call add-objs,fd_snapdc_tile,fd_discof) endif # FD_HAS_ZSTD $(call add-objs,fd_snapin_tile fd_snapin_tile_funk fd_snapin_tile_vinyl,fd_discof) +$(call add-objs,fd_snapwm_tile fd_snapwm_tile_vinyl,fd_discof) endif # FD_HAS_SSE $(call add-objs,fd_snapwh_tile,fd_discof) $(call add-objs,fd_snapwr_tile,fd_discof) diff --git a/src/discof/restore/fd_snapct_tile.c b/src/discof/restore/fd_snapct_tile.c index cfe0406859d..4603dc616f8 100644 --- a/src/discof/restore/fd_snapct_tile.c +++ b/src/discof/restore/fd_snapct_tile.c @@ -1354,7 +1354,7 @@ unprivileged_init( fd_topo_t * topo, ctx->snapld_in_mem = topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ].wksp; FD_TEST( !has_snapld_dc ); has_snapld_dc = 1; - } else if( 0==strcmp( in_link->name, "snapin_ct" ) || 0==strcmp( in_link->name, "snapls_ct" ) ) { + } else if( 0==strcmp( in_link->name, "snapin_ct" ) || 0==strcmp( in_link->name, "snapls_ct" ) || 0==strcmp( in_link->name, "snapwm_ct" ) ) { ctx->in_kind[ i ] = IN_KIND_ACK; FD_TEST( !has_ack_loopback ); has_ack_loopback = 1; diff --git a/src/discof/restore/fd_snapin_tile.c b/src/discof/restore/fd_snapin_tile.c index 12a68b45dee..ffda6893ffd 100644 --- a/src/discof/restore/fd_snapin_tile.c +++ b/src/discof/restore/fd_snapin_tile.c @@ -73,11 +73,9 @@ scratch_footprint( fd_topo_tile_t const * tile ) { l = FD_LAYOUT_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->snapin.max_live_slots ) ); l = FD_LAYOUT_APPEND( l, fd_ssmanifest_parser_align(), fd_ssmanifest_parser_footprint() ); l = FD_LAYOUT_APPEND( l, fd_slot_delta_parser_align(), fd_slot_delta_parser_footprint() ); - l = FD_LAYOUT_APPEND( l, alignof(fd_sstxncache_entry_t), sizeof(fd_sstxncache_entry_t)*FD_SNAPIN_TXNCACHE_MAX_ENTRIES ); - l = FD_LAYOUT_APPEND( l, alignof(blockhash_group_t), sizeof(blockhash_group_t)*FD_SNAPIN_MAX_SLOT_DELTA_GROUPS ); - if( tile->snapin.use_vinyl ) { - l = FD_LAYOUT_APPEND( l, fd_vinyl_io_wd_align(), fd_vinyl_io_wd_footprint( tile->snapin.snapwr_depth ) ); - l = FD_LAYOUT_APPEND( l, fd_vinyl_io_mm_align(), fd_vinyl_io_mm_footprint( FD_SNAPIN_IO_SPAD_MAX ) ); + l = FD_LAYOUT_APPEND( l, alignof(blockhash_group_t), sizeof(blockhash_group_t)*FD_SNAPIN_MAX_SLOT_DELTA_GROUPS ); + if( !tile->snapin.use_vinyl ) { + l = FD_LAYOUT_APPEND( l, alignof(fd_sstxncache_entry_t), sizeof(fd_sstxncache_entry_t)*FD_SNAPIN_TXNCACHE_MAX_ENTRIES ); } return FD_LAYOUT_FINI( l, scratch_align() ); } @@ -362,6 +360,10 @@ populate_txncache( fd_snapin_tile_t * ctx, fd_txncache_insert( ctx->txncache, banks[ 0UL ].fork_id, entry->blockhash, entry->txnhash ); } + if( !!ctx->use_vinyl && !!ctx->txncache_entries_len_vinyl_ptr ) { + memcpy( ctx->txncache_entries_len_vinyl_ptr, &ctx->txncache_entries_len, sizeof(ulong) ); + } + FD_LOG_INFO(( "inserted %lu/%lu transactions into the txncache", insert_cnt, ctx->txncache_entries_len )); /* Then finalize all the banks (freezing them) and setting the txnhash @@ -572,7 +574,7 @@ handle_control_frag( fd_snapin_tile_t * ctx, switch( sig ) { case FD_SNAPSHOT_MSG_CTRL_INIT_FULL: case FD_SNAPSHOT_MSG_CTRL_INIT_INCR: - fd_ssparse_batch_enable( ctx->ssparse, sig==FD_SNAPSHOT_MSG_CTRL_INIT_FULL ); + fd_ssparse_batch_enable( ctx->ssparse, ctx->use_vinyl || sig==FD_SNAPSHOT_MSG_CTRL_INIT_FULL ); FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE ); ctx->state = FD_SNAPSHOT_STATE_PROCESSING; ctx->full = sig==FD_SNAPSHOT_MSG_CTRL_INIT_FULL; @@ -584,12 +586,6 @@ handle_control_frag( fd_snapin_tile_t * ctx, fd_slot_delta_parser_init( ctx->slot_delta_parser ); fd_memset( &ctx->flags, 0, sizeof(ctx->flags) ); fd_memset( &ctx->vinyl_op, 0, sizeof(ctx->vinyl_op) ); - if( ctx->use_vinyl ) { - if( sig==FD_SNAPSHOT_MSG_CTRL_INIT_INCR ) { - fd_snapin_vinyl_txn_begin( ctx ); - } - fd_snapin_vinyl_wd_init( ctx ); - } break; case FD_SNAPSHOT_MSG_CTRL_FAIL: @@ -598,18 +594,11 @@ handle_control_frag( fd_snapin_tile_t * ctx, ctx->state==FD_SNAPSHOT_STATE_ERROR ); ctx->state = FD_SNAPSHOT_STATE_IDLE; - if( ctx->use_vinyl ) { - fd_snapin_vinyl_wd_fini( ctx ); - if( ctx->vinyl.txn_active ) { - fd_snapin_vinyl_txn_cancel( ctx ); - } + if( ctx->full ) { + fd_accdb_clear( ctx->accdb_admin ); } else { - if( ctx->full ) { - fd_accdb_clear( ctx->accdb_admin ); - } else { - fd_accdb_cancel( ctx->accdb_admin, ctx->xid ); - fd_funk_txn_xid_copy( ctx->xid, fd_funk_last_publish( ctx->accdb_admin->funk ) ); - } + fd_accdb_cancel( ctx->accdb_admin, ctx->xid ); + fd_funk_txn_xid_copy( ctx->xid, fd_funk_last_publish( ctx->accdb_admin->funk ) ); } break; @@ -623,13 +612,6 @@ handle_control_frag( fd_snapin_tile_t * ctx, } ctx->state = FD_SNAPSHOT_STATE_IDLE; - if( ctx->use_vinyl ) { - fd_snapin_vinyl_wd_fini( ctx ); - if( ctx->vinyl.txn_active ) { - fd_snapin_vinyl_txn_commit( ctx ); - } - } - fd_funk_txn_xid_t incremental_xid = { .ul={ LONG_MAX, LONG_MAX } }; fd_accdb_attach_child( ctx->accdb_admin, ctx->xid, &incremental_xid ); fd_funk_txn_xid_copy( ctx->xid, &incremental_xid ); @@ -646,19 +628,14 @@ handle_control_frag( fd_snapin_tile_t * ctx, } ctx->state = FD_SNAPSHOT_STATE_IDLE; - if( ctx->use_vinyl ) { - fd_snapin_vinyl_wd_fini( ctx ); - if( ctx->vinyl.txn_active ) { - fd_snapin_vinyl_txn_commit( ctx ); + if( !ctx->use_vinyl ) { + if( FD_UNLIKELY( verify_slot_deltas_with_slot_history( ctx ) ) ) { + FD_LOG_WARNING(( "slot deltas verification failed" )); + transition_malformed( ctx, stem ); + break; } } - if( FD_UNLIKELY( verify_slot_deltas_with_slot_history( ctx ) ) ) { - FD_LOG_WARNING(( "slot deltas verification failed" )); - transition_malformed( ctx, stem ); - break; - } - /* Publish any remaining funk txn */ if( FD_LIKELY( fd_funk_last_publish_is_frozen( ctx->accdb_admin->funk ) ) ) { fd_accdb_advance_root( ctx->accdb_admin, ctx->xid ); @@ -678,17 +655,10 @@ handle_control_frag( fd_snapin_tile_t * ctx, case FD_SNAPSHOT_MSG_CTRL_SHUTDOWN: FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE ); ctx->state = FD_SNAPSHOT_STATE_SHUTDOWN; - if( ctx->use_vinyl ) fd_snapin_vinyl_shutdown( ctx ); break; case FD_SNAPSHOT_MSG_CTRL_ERROR: ctx->state = FD_SNAPSHOT_STATE_ERROR; - if( ctx->use_vinyl ) { - fd_snapin_vinyl_wd_fini( ctx ); - if( ctx->vinyl.txn_active ) { - fd_snapin_vinyl_txn_cancel( ctx ); - } - } break; default: @@ -744,7 +714,7 @@ populate_allowed_seccomp( fd_topo_t const * topo, struct sock_filter * out ) { (void)topo; if( tile->snapin.use_vinyl ) { - return fd_snapin_vinyl_seccomp( out_cnt, out ); + return 0; } else { populate_sock_filter_policy_fd_snapin_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() ); return sock_filter_policy_fd_snapin_tile_instr_cnt; @@ -797,13 +767,24 @@ unprivileged_init( fd_topo_t * topo, void * _txncache = FD_SCRATCH_ALLOC_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->snapin.max_live_slots ) ); void * _manifest_parser = FD_SCRATCH_ALLOC_APPEND( l, fd_ssmanifest_parser_align(), fd_ssmanifest_parser_footprint() ); void * _sd_parser = FD_SCRATCH_ALLOC_APPEND( l, fd_slot_delta_parser_align(), fd_slot_delta_parser_footprint() ); - ctx->txncache_entries = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sstxncache_entry_t), sizeof(fd_sstxncache_entry_t)*FD_SNAPIN_TXNCACHE_MAX_ENTRIES ); ctx->blockhash_offsets = FD_SCRATCH_ALLOC_APPEND( l, alignof(blockhash_group_t), sizeof(blockhash_group_t)*FD_SNAPIN_MAX_SLOT_DELTA_GROUPS ); - void * _io_wd = NULL; - void * _io_mm = NULL; - if( tile->snapin.use_vinyl ) { - _io_wd = FD_SCRATCH_ALLOC_APPEND( l, fd_vinyl_io_wd_align(), fd_vinyl_io_wd_footprint( tile->snapin.snapwr_depth ) ); - _io_mm = FD_SCRATCH_ALLOC_APPEND( l, fd_vinyl_io_mm_align(), fd_vinyl_io_mm_footprint( FD_SNAPIN_IO_SPAD_MAX ) ); + + if( !tile->snapin.use_vinyl ) { + ctx->txncache_entries = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sstxncache_entry_t), sizeof(fd_sstxncache_entry_t)*FD_SNAPIN_TXNCACHE_MAX_ENTRIES ); + ctx->txncache_entries_len_vinyl_ptr = NULL; + } else { + /* only the dcache of snapin_txn is of use here */ + fd_snapin_out_link_t snapin_txn = out1( topo, tile, "snapin_txn" ); + FD_TEST( !!snapin_txn.mem ); + ulong depth = topo->links[ tile->out_link_id[ snapin_txn.idx ] ].depth; + FD_TEST( ( depth*snapin_txn.mtu )==( sizeof(fd_sstxncache_entry_t)*(FD_SNAPIN_TXNCACHE_MAX_ENTRIES+1UL) ) ); + /* Although only the first 8 bytes are needed for txncache_entries_len, + it is easier to handle mtu in multiples of fd_sstxncache_entry_t. */ + fd_sstxncache_entry_t * txncache_base = fd_chunk_to_laddr( snapin_txn.mem, snapin_txn.chunk0 ); + ctx->txncache_entries_len_vinyl_ptr = (ulong*)txncache_base; + FD_TEST( sizeof(ulong)<=sizeof(fd_sstxncache_entry_t) ); + memset( ctx->txncache_entries_len_vinyl_ptr, 0, sizeof(ulong) ); + ctx->txncache_entries = txncache_base + 1UL; } ctx->full = 1; @@ -843,15 +824,17 @@ unprivileged_init( fd_topo_t * topo, ctx->gui_out = out1( topo, tile, "snapin_gui" ); ulong out_link_ct_idx = fd_topo_find_tile_out_link( topo, tile, "snapin_ct", 0UL ); if( out_link_ct_idx==ULONG_MAX ) out_link_ct_idx = fd_topo_find_tile_out_link( topo, tile, "snapin_ls", 0UL ); - if( FD_UNLIKELY( out_link_ct_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `" NAME "` missing required out link `snapin_ct` or `snapin_ls`" )); + if( out_link_ct_idx==ULONG_MAX ) out_link_ct_idx = fd_topo_find_tile_out_link( topo, tile, "snapin_wm", 0UL ); + if( FD_UNLIKELY( out_link_ct_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `" NAME "` missing required out link `snapin_ct` or `snapin_ls` or `snapin_wm`" )); fd_topo_link_t * snapin_out_link = &topo->links[ tile->out_link_id[ out_link_ct_idx ] ]; ctx->out_ct_idx = out_link_ct_idx; - if( FD_UNLIKELY( ctx->out_ct_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `" NAME "` missing required out link `snapin_ct` or `snapin_ls`" )); + if( FD_UNLIKELY( ctx->out_ct_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `" NAME "` missing required out link `snapin_ct` or `snapin_ls` or `snapin_wm`" )); if( FD_UNLIKELY( ctx->manifest_out.idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `" NAME "` missing required out link `snapin_manif`" )); - if( 0==strcmp( snapin_out_link->name, "snapin_ls" ) ) { - ctx->hash_out = out1( topo, tile, "snapin_ls" ); + if( ( 0==strcmp( snapin_out_link->name, "snapin_ls" ) ) || + ( 0==strcmp( snapin_out_link->name, "snapin_wm" ) ) ) { + ctx->hash_out = out1( topo, tile, snapin_out_link->name ); } fd_ssparse_reset( ctx->ssparse ); @@ -873,7 +856,8 @@ unprivileged_init( fd_topo_t * topo, fd_memset( &ctx->flags, 0, sizeof(ctx->flags) ); if( tile->snapin.use_vinyl ) { - fd_snapin_vinyl_unprivileged_init( ctx, topo, tile, _io_mm, _io_wd ); + ctx->use_vinyl = 1; + fd_snapin_vinyl_unprivileged_init( ctx, topo, tile, NULL, NULL ); } } diff --git a/src/discof/restore/fd_snapin_tile_private.h b/src/discof/restore/fd_snapin_tile_private.h index e3a535bc560..60357c117f4 100644 --- a/src/discof/restore/fd_snapin_tile_private.h +++ b/src/discof/restore/fd_snapin_tile_private.h @@ -80,7 +80,8 @@ struct fd_snapin_tile { ulong blockhash_offsets_len; blockhash_group_t * blockhash_offsets; - ulong txncache_entries_len; + ulong txncache_entries_len; + ulong * txncache_entries_len_vinyl_ptr; fd_sstxncache_entry_t * txncache_entries; fd_txncache_fork_id_t txncache_root_fork_id; @@ -104,22 +105,6 @@ struct fd_snapin_tile { fd_snapin_out_link_t gui_out; fd_snapin_out_link_t hash_out; - struct { - uchar * bstream_mem; - ulong bstream_sz; - - /* Vinyl in either io_wd or io_mm mode */ - fd_vinyl_io_t * io; - fd_vinyl_io_t * io_wd; - fd_vinyl_io_t * io_mm; - ulong io_seed; - - fd_vinyl_meta_t map[1]; - - ulong txn_seq; /* bstream seq of first txn record (in [seq_past,seq_present]) */ - uint txn_active : 1; - } vinyl; - struct { uchar * pair; ulong pair_sz; @@ -235,9 +220,9 @@ fd_snapin_vinyl_shutdown( fd_snapin_tile_t * ctx ); /* Internal APIs for inserting accounts */ -void fd_snapin_process_account_header_vinyl( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ); -void fd_snapin_process_account_data_vinyl ( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ); -void fd_snapin_process_account_batch_vinyl ( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ); +int fd_snapin_process_account_header_vinyl( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ); +int fd_snapin_process_account_data_vinyl ( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ); +int fd_snapin_process_account_batch_vinyl ( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ); void fd_snapin_read_account_vinyl( fd_snapin_tile_t * ctx, @@ -260,7 +245,7 @@ static inline int fd_snapin_process_account_header( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ) { if( ctx->use_vinyl ) { - fd_snapin_process_account_header_vinyl( ctx, result ); + return fd_snapin_process_account_header_vinyl( ctx, result ); } else { return fd_snapin_process_account_header_funk( ctx, result ); } @@ -271,7 +256,7 @@ static inline int fd_snapin_process_account_data( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ) { if( ctx->use_vinyl ) { - fd_snapin_process_account_data_vinyl( ctx, result ); + return fd_snapin_process_account_data_vinyl( ctx, result ); } else { return fd_snapin_process_account_data_funk( ctx, result ); } @@ -283,7 +268,7 @@ fd_snapin_process_account_batch( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result, buffered_account_batch_t * buffered_batch ) { if( ctx->use_vinyl ) { - fd_snapin_process_account_batch_vinyl( ctx, result ); + return fd_snapin_process_account_batch_vinyl( ctx, result ); } else { return fd_snapin_process_account_batch_funk( ctx, result, buffered_batch ); } diff --git a/src/discof/restore/fd_snapin_tile_vinyl.c b/src/discof/restore/fd_snapin_tile_vinyl.c index efee0569ae7..9ff186bc9cb 100644 --- a/src/discof/restore/fd_snapin_tile_vinyl.c +++ b/src/discof/restore/fd_snapin_tile_vinyl.c @@ -1,115 +1,14 @@ #define _DEFAULT_SOURCE /* madvise */ #include "fd_snapin_tile_private.h" #include "utils/fd_ssparse.h" -#include "utils/fd_ssctrl.h" #include "utils/fd_vinyl_io_wd.h" -#include -#include /* open */ -#include /* mmap, madvise */ -#include /* fstat */ -#include /* close */ - -#include "generated/fd_snapin_tile_vinyl_seccomp.h" - -/**********************************************************************\ - - Vinyl 101: - - Vinyl is Firedancer's main account database - - Vinyl is comprised of several components on-disk and in-memory - - vinyl_bstream is a single file containing all vinyl records - - vinyl_bstream is the source of truth - - vinyl_meta indexes the latest revisions of all elements in - vinyl_bstream - - Vinyl has an in-memory caching layer, but snapin does not use it - - The snapshot loader must: - - Load the most recent version of each account into bstream - - Create a full vinyl_meta index of accounts - - Recover from load failures and retry - - Note on I/O layers: - - io_mm is the slow/generic memory mapped I/O backend. - - io_wd is the fast/dumb O_DIRECT backend. Can only append, thus used - for hot path account writing. - - io_mm and io_wd cannot be active at the same time -- snapin will - switch between them as necessary. - - Full snapshot logic: - - Write accounts to bstream (io_wd) - - Synchronously populate the vinyl_meta index while writing - - On load failure, destroy and recreate the bstream (io_mm) - - Incremental snapshot logic: - - Phase 1: while reading the incremental snapshot - - Write accounts to bstream without updating the index (io_wd) - - On load failure, undo writes done to bstream (io_mm) - - Phase 2: once read is done - - Replay all elements written to bstream (io_mm) - - Populate the vinyl_meta index while replaying - -\**********************************************************************/ - void fd_snapin_vinyl_privileged_init( fd_snapin_tile_t * ctx, fd_topo_t * topo, fd_topo_tile_t * tile ) { - void * shmap = fd_topo_obj_laddr( topo, tile->snapin.vinyl_meta_map_obj_id ); - void * shele = fd_topo_obj_laddr( topo, tile->snapin.vinyl_meta_pool_obj_id ); - - FD_TEST( fd_vinyl_meta_join( ctx->vinyl.map, shmap, shele ) ); - - /* Set up io_mm dependencies */ - - char const * bstream_path = tile->snapin.vinyl_path; - int bstream_fd = open( bstream_path, O_RDWR|O_CLOEXEC, 0644 ); - if( FD_UNLIKELY( bstream_fd<0 ) ) { - FD_LOG_ERR(( "open(%s,O_RDWR|O_CLOEXEC,0644) failed (%i-%s)", - bstream_path, errno, fd_io_strerror( errno ) )); - } - - struct stat st; - if( FD_UNLIKELY( fstat( bstream_fd, &st )!=0 ) ) { - FD_LOG_ERR(( "fstat(%s) failed (%i-%s)", - bstream_path, errno, fd_io_strerror( errno ) )); - } - ulong bstream_sz = (ulong)st.st_size; - if( FD_UNLIKELY( !fd_ulong_is_aligned( bstream_sz, FD_VINYL_BSTREAM_BLOCK_SZ ) ) ) { - FD_LOG_ERR(( "vinyl file %s has misaligned size (%lu bytes)", bstream_path, bstream_sz )); - } - - void * bstream_mem = mmap( NULL, bstream_sz, PROT_READ|PROT_WRITE, MAP_SHARED, bstream_fd, 0 ); - if( FD_UNLIKELY( bstream_mem==MAP_FAILED ) ) { - FD_LOG_ERR(( "mmap(sz=%lu,PROT_READ|PROT_WRITE,MAP_SHARED,path=%s,off=0) failed (%i-%s)", - bstream_sz, bstream_path, errno, fd_io_strerror( errno ) )); - } - - if( FD_UNLIKELY( 0!=close( bstream_fd ) ) ) { /* clean up unused fd */ - FD_LOG_ERR(( "close(fd=%i) failed (%i-%s)", - bstream_fd, errno, fd_io_strerror( errno ) )); - } - - ctx->vinyl.bstream_mem = bstream_mem; - ctx->vinyl.bstream_sz = bstream_sz; - - FD_TEST( fd_rng_secure( &ctx->vinyl.io_seed, 8UL ) ); -} - -static void -io_mm_align_4k( fd_snapin_tile_t * ctx ) { - fd_vinyl_io_t * io_mm = ctx->vinyl.io_mm; - if( FD_UNLIKELY( io_mm->seq_future!=0UL ) ) { - FD_LOG_CRIT(( "unexpected io_mm state (seq_future=%lu)", io_mm->seq_future )); - } - uchar * mmio = fd_vinyl_mmio ( io_mm ); - ulong mmio_sz = fd_vinyl_mmio_sz( io_mm ); - - ulong bstream_preamble = fd_ulong_align_up( FD_VINYL_BSTREAM_BLOCK_SZ, 4096UL ) - FD_VINYL_BSTREAM_BLOCK_SZ; - FD_CRIT( bstream_preamble<=mmio_sz, "bstream too small for 4k alignment" ); - - fd_memset( mmio, 0, bstream_preamble ); - io_mm->seq_present += bstream_preamble; - io_mm->seq_future += bstream_preamble; + /* Nothing to do */ + (void)ctx; (void)topo; (void)tile; } void @@ -118,311 +17,21 @@ fd_snapin_vinyl_unprivileged_init( fd_snapin_tile_t * ctx, fd_topo_tile_t * tile, void * io_mm_mem, void * io_wd_mem ) { - - /* Set up io_mm */ - - ctx->vinyl.io_mm = - fd_vinyl_io_mm_init( io_mm_mem, - FD_SNAPIN_IO_SPAD_MAX, - ctx->vinyl.bstream_mem, - ctx->vinyl.bstream_sz, - 1, - "accounts-v0", 12UL, - ctx->vinyl.io_seed ); - if( FD_UNLIKELY( !ctx->vinyl.io_mm ) ) { - FD_LOG_ERR(( "fd_vinyl_io_mm_init failed" )); - } - - /* Write out zero blocks to align the bstream by 4096 bytes - (Assuming a 512 byte sync block) */ - - io_mm_align_4k( ctx ); - - /* Set up io_wd dependencies */ - - ulong wr_link_id = fd_topo_find_tile_out_link( topo, tile, "snapin_wh", 0UL ); - if( FD_UNLIKELY( wr_link_id==ULONG_MAX ) ) FD_LOG_CRIT(( "snapin_wh link not found" )); - fd_topo_link_t * wr_link = &topo->links[ tile->out_link_id[ wr_link_id ] ]; - - if( FD_UNLIKELY( tile->snapin.snapwr_depth != fd_mcache_depth( wr_link->mcache ) ) ) { - /* FIXME TOCTOU issue ... A malicious downstream tile could - theoretically corrupt mcache->depth and cause an OOB access - while snapin is still initializing. Practically not an - issue because the system is not exposed to attacker- - controlled input at boot time. */ - FD_LOG_CRIT(( "snapin_wr link mcache depth %lu does not match snapwr_depth %lu", - fd_mcache_depth( wr_link->mcache ), tile->snapin.snapwr_depth )); - } - - if( FD_UNLIKELY( fd_topo_link_reliable_consumer_cnt( topo, wr_link )!=1UL ) ) { - FD_LOG_CRIT(( "snapin_wr link must have exactly one reliable consumer" )); - } - - ulong wh_tile_id = fd_topo_find_tile( topo, "snapwh", 0UL ); - FD_TEST( wh_tile_id!=ULONG_MAX ); - fd_topo_tile_t * wh_tile = &topo->tiles[ wh_tile_id ]; - FD_TEST( wh_tile->in_cnt==1 ); - FD_TEST( wh_tile->in_link_id[0] == wr_link->id ); - FD_CRIT( 0==strcmp( topo->links[ wh_tile->in_link_id[ 0 ] ].name, "snapin_wh" ), "unexpected link found" ); - ulong const * wh_fseq = wh_tile->in_link_fseq[ 0 ]; - if( FD_UNLIKELY( !wh_fseq ) ) { - FD_LOG_CRIT(( "snapin_wr link reliable consumer fseq not found" )); - } - - /* Set up io_wd */ - - ctx->vinyl.io_wd = - fd_vinyl_io_wd_init( io_wd_mem, - ctx->vinyl.bstream_sz, - ctx->vinyl.io_mm->seed, - wr_link->mcache, - wr_link->dcache, - wh_fseq, - wr_link->mtu ); - if( FD_UNLIKELY( !ctx->vinyl.io_wd ) ) { - FD_LOG_ERR(( "fd_vinyl_io_wd_init failed" )); - } - - /* Start by using io_mm */ - - ctx->vinyl.io = ctx->vinyl.io_mm; -} - -ulong -fd_snapin_vinyl_seccomp( ulong out_cnt, - struct sock_filter * out ) { - populate_sock_filter_policy_fd_snapin_tile_vinyl( out_cnt, out, (uint)fd_log_private_logfile_fd() ); - return sock_filter_policy_fd_snapin_tile_vinyl_instr_cnt; -} - -static void -vinyl_mm_sync( fd_snapin_tile_t * ctx ) { - if( FD_UNLIKELY( 0!=msync( ctx->vinyl.bstream_mem, ctx->vinyl.bstream_sz, MS_SYNC ) ) ) { - FD_LOG_ERR(( "msync(addr=%p,sz=%lu,MS_SYNC) failed (%i-%s)", - (void *)ctx->vinyl.bstream_mem, ctx->vinyl.bstream_sz, - errno, fd_io_strerror( errno ) )); - } -} - -/* Faster vinyl meta accesses *****************************************/ - -static fd_vinyl_meta_ele_t * -fd_vinyl_meta_prepare_nolock( fd_vinyl_meta_t * join, - fd_vinyl_key_t const * key, - ulong memo ) { - fd_vinyl_meta_ele_t * ele0 = join->ele; - ulong ele_max = join->ele_max; - ulong probe_max = join->probe_max; - void * ctx = join->ctx; - - ulong start_idx = memo & (ele_max-1UL); - - for(;;) { - - ulong ele_idx = start_idx; - - for( ulong probe_rem=probe_max; probe_rem; probe_rem-- ) { - fd_vinyl_meta_ele_t * ele = ele0 + ele_idx; - - if( FD_LIKELY( fd_vinyl_meta_private_ele_is_free( ctx, ele ) ) || /* opt for low collision */ - ( - FD_LIKELY( ele->memo==memo ) && - FD_LIKELY( fd_vinyl_key_eq( &ele->phdr.key, key ) ) /* opt for already in map */ - ) ) { - return ele; - } - - ele_idx = (ele_idx+1UL) & (ele_max-1UL); - } - - return NULL; - - } - - /* never get here */ -} - -/* Transactional APIs *************************************************/ - -void -fd_snapin_vinyl_txn_begin( fd_snapin_tile_t * ctx ) { - FD_CRIT( !ctx->vinyl.txn_active, "txn_begin called while already in txn" ); - FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); - fd_vinyl_io_t * io = ctx->vinyl.io_mm; - - /* Finish any outstanding writes */ - int commit_err = fd_vinyl_io_commit( io, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); - - ctx->vinyl.txn_seq = io->seq_present; - ctx->vinyl.txn_active = 1; -} - -void -fd_snapin_vinyl_txn_commit( fd_snapin_tile_t * ctx ) { - FD_CRIT( ctx->vinyl.txn_active, "txn_commit called while not in txn" ); - FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); - fd_vinyl_io_t * io = ctx->vinyl.io_mm; - - long dt = -fd_log_wallclock(); - - /* Finish any outstanding writes */ - - int commit_err = fd_vinyl_io_commit( io, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); - - /* Hint to kernel to start prefetching to speed up reads */ - - uchar * mmio = fd_vinyl_mmio ( io ); FD_TEST( mmio ); - ulong mmio_sz = fd_vinyl_mmio_sz( io ); - - ulong txn_seq0 = ctx->vinyl.txn_seq; - ulong txn_seq1 = ctx->vinyl.io_mm->seq_present; - FD_LOG_INFO(( "vinyl txn_commit starting for seq [%lu,%lu)", txn_seq0, txn_seq1 )); - ulong txn_sz = txn_seq1-txn_seq0; - FD_CRIT( fd_vinyl_seq_le( txn_seq0, txn_seq1 ), "invalid txn seq range" ); - FD_CRIT( txn_seq1 <= mmio_sz, "invalid txn seq range" ); - if( FD_UNLIKELY( fd_vinyl_seq_eq( txn_seq0, txn_seq1 ) ) ) return; - - void * madv_base = (void *)fd_ulong_align_dn( (ulong)mmio+txn_seq0, FD_SHMEM_NORMAL_PAGE_SZ ); - ulong madv_sz = /* */fd_ulong_align_up( txn_sz, FD_SHMEM_NORMAL_PAGE_SZ ); - if( FD_UNLIKELY( madvise( madv_base, madv_sz, MADV_SEQUENTIAL ) ) ) { - FD_LOG_WARNING(( "madvise(addr=%p,sz=%lu,MADV_SEQUENTIAL) failed (%i-%s)", - madv_base, madv_sz, - errno, fd_io_strerror( errno ) )); - } - - /* Replay incremental account updates */ - - fd_vinyl_meta_t * meta_map = ctx->vinyl.map; - for( ulong seq=txn_seq0; fd_vinyl_seq_lt( seq, txn_seq1 ); ) { - fd_vinyl_bstream_block_t * block = (void *)( mmio+seq ); - - /* Speculatively read block info */ - ulong ctl = FD_VOLATILE_CONST( block->ctl ); - fd_vinyl_bstream_phdr_t phdr = FD_VOLATILE_CONST( block->phdr ); - - ulong val_esz = fd_vinyl_bstream_ctl_sz ( ctl ); - int block_type = fd_vinyl_bstream_ctl_type( ctl ); - ulong block_sz; - - if( FD_LIKELY( block_type==FD_VINYL_BSTREAM_CTL_TYPE_PAIR ) ) { - block_sz = fd_vinyl_bstream_pair_sz( val_esz ); - ulong memo = fd_vinyl_key_memo( meta_map->seed, &phdr.key ); - fd_vinyl_meta_ele_t * ele = fd_vinyl_meta_prepare_nolock( meta_map, &phdr.key, memo ); - if( FD_UNLIKELY( !ele ) ) FD_LOG_CRIT(( "fd_vinyl_meta_prepare failed (full)" )); - - /* Erase value if existing is newer */ - if( FD_UNLIKELY( fd_vinyl_meta_ele_in_use( ele ) ) ) { /* key exists */ - ulong exist_slot = ele->phdr.info.ul[ 1 ]; - ulong cur_slot = phdr.info.ul[ 1 ]; - if( exist_slot > cur_slot ) { - fd_memset( block, 0, block_sz ); - goto next; - } - } - - /* Overwrite map entry */ - ele->memo = memo; - ele->phdr = phdr; - ele->seq = seq; - ele->line_idx = ULONG_MAX; - } else if( block_type==FD_VINYL_BSTREAM_CTL_TYPE_ZPAD ) { - block_sz = FD_VINYL_BSTREAM_BLOCK_SZ; - } else { - FD_LOG_CRIT(( "unexpected block type %d", block_type )); - } - - - if( FD_UNLIKELY( !block_sz ) ) { - FD_LOG_CRIT(( "Invalid block header at vinyl seq %lu, ctl=%016lx (zero block_sz)", seq, ctl )); - } - if( FD_UNLIKELY( block_sz > 64UL<<20 ) ) { - FD_LOG_CRIT(( "Invalid block header at vinyl seq %lu, ctl=%016lx, block_sz=%lu (unreasonably large block size)", seq, ctl, block_sz )); - } - -next: - seq += block_sz; - } - - /* Persist above erases to disk */ - - int sync_err = fd_vinyl_io_sync( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( sync_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_sync(io_mm) failed (%i-%s)", sync_err, fd_vinyl_strerror( sync_err ) )); - vinyl_mm_sync( ctx ); - - dt += fd_log_wallclock(); - FD_LOG_INFO(( "vinyl txn_commit took %g seconds", (double)dt/1e9 )); -} - -void -fd_snapin_vinyl_txn_cancel( fd_snapin_tile_t * ctx ) { - FD_CRIT( ctx->vinyl.txn_active, "txn_cancel called while not in txn" ); - FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); - - fd_vinyl_io_t * io = ctx->vinyl.io_mm; - fd_vinyl_io_rewind( io, ctx->vinyl.txn_seq ); - fd_vinyl_io_sync ( io, FD_VINYL_IO_FLAG_BLOCKING ); + /* Nothing to do */ + (void)ctx; (void)topo; (void)tile; (void)io_mm_mem; (void)io_wd_mem; } -/* Fast writer ********************************************************/ - -void -fd_snapin_vinyl_wd_init( fd_snapin_tile_t * ctx ) { - FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); - - int commit_err = fd_vinyl_io_commit( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit(io_mm) failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); - - /* Flush io_mm */ - - int sync_err = fd_vinyl_io_sync( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( sync_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_sync(io_mm) failed (%i-%s)", sync_err, fd_vinyl_strerror( sync_err ) )); - vinyl_mm_sync( ctx ); - - /* Synchronize sequence numbers */ - - ctx->vinyl.io_wd->seq_ancient = ctx->vinyl.io_mm->seq_ancient; - ctx->vinyl.io_wd->seq_past = ctx->vinyl.io_mm->seq_past; - ctx->vinyl.io_wd->seq_present = ctx->vinyl.io_mm->seq_present; - ctx->vinyl.io_wd->seq_future = ctx->vinyl.io_mm->seq_future; - ctx->vinyl.io_wd->spad_used = 0UL; - - ctx->vinyl.io = ctx->vinyl.io_wd; -} - -void -fd_snapin_vinyl_wd_fini( fd_snapin_tile_t * ctx ) { - if( FD_UNLIKELY( ctx->vinyl.io!=ctx->vinyl.io_wd ) ) return; - - int commit_err = fd_vinyl_io_commit( ctx->vinyl.io_wd, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit(io_wd) failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); - - /* Synchronize sequence numbers */ +/* bstream_push_account finishes processing a single account (pair). + A single fd_stem_publish is issued, and the chunk always advances + by mtu size. */ - ctx->vinyl.io_mm->seq_ancient = ctx->vinyl.io_wd->seq_ancient; - ctx->vinyl.io_mm->seq_past = ctx->vinyl.io_wd->seq_past; - ctx->vinyl.io_mm->seq_present = ctx->vinyl.io_wd->seq_present; - ctx->vinyl.io_mm->seq_future = ctx->vinyl.io_wd->seq_future; - ctx->vinyl.io_mm->spad_used = 0UL; - - ctx->vinyl.io = ctx->vinyl.io_mm; -} - -/* bstream_push_account writes a single account out to bstream. */ - -static void +static inline void bstream_push_account( fd_snapin_tile_t * ctx ) { FD_CRIT( !ctx->vinyl_op.data_rem, "incomplete account store" ); FD_CRIT( ctx->vinyl_op.pair, "no store in progres" ); - fd_vinyl_io_t * io = ctx->vinyl.io; - - uchar * pair = ctx->vinyl_op.pair; - ulong pair_sz = ctx->vinyl_op.pair_sz; - - ulong seq_after = fd_vinyl_io_append( io, pair, pair_sz ); - if( ctx->full ) ctx->vinyl_op.meta_ele->seq = seq_after; + fd_stem_publish( ctx->stem, ctx->hash_out.idx, FD_SNAPSHOT_MSG_DATA/*sig*/, ctx->hash_out.chunk, 1UL/*sz=acc_cnt*/, 0UL, 0UL/*tsorig*/, 0UL/*tspub*/ ); + ctx->hash_out.chunk = fd_dcache_compact_next( ctx->hash_out.chunk, ctx->hash_out.mtu, ctx->hash_out.chunk0, ctx->hash_out.wmark ); ctx->vinyl_op.pair = NULL; ctx->vinyl_op.pair_sz = 0UL; @@ -433,35 +42,21 @@ bstream_push_account( fd_snapin_tile_t * ctx ) { ctx->metrics.accounts_inserted++; } -/* bstream_alloc is a faster version of fd_vinyl_io_alloc. Indirect - calls have significant overhead on Zen 5. */ - -static uchar * -bstream_alloc( fd_vinyl_io_t * io, - ulong sz, - int flags ) { - if( FD_LIKELY( io->impl==&fd_vinyl_io_wd_impl ) ) - return fd_vinyl_io_wd_alloc( io, sz, flags ); - return fd_vinyl_io_alloc( io, sz, flags ); -} - -/* fd_snapin_process_account_header_vinyl prepares a bstream write for - one account (slow) */ +/* fd_snapin_process_account_header_vinyl starts processing a + (possibly fragmented) account (slow). */ -void +int fd_snapin_process_account_header_vinyl( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ) { FD_CRIT( !ctx->vinyl_op.dst_rem, "incomplete account store" ); FD_CRIT( !ctx->vinyl_op.pair, "incomplete account store" ); - fd_vinyl_io_t * io = ctx->vinyl.io; - fd_vinyl_meta_t * map = ctx->vinyl.map; - ulong val_sz = sizeof(fd_account_meta_t) + result->account_header.data_len; FD_CRIT( val_sz<=FD_VINYL_VAL_MAX, "corruption detected" ); ulong pair_sz = fd_vinyl_bstream_pair_sz( val_sz ); - uchar * pair = bstream_alloc( io, pair_sz, FD_VINYL_IO_FLAG_BLOCKING ); + FD_TEST( pair_sz<=ctx->hash_out.mtu ); + uchar * pair = fd_chunk_to_laddr( ctx->hash_out.mem, ctx->hash_out.chunk ); uchar * dst = pair; ulong dst_rem = pair_sz; @@ -490,28 +85,6 @@ fd_snapin_process_account_header_vinyl( fd_snapin_tile_t * ctx, dst_rem -= sizeof(fd_account_meta_t); FD_CRIT( dst_rem >= result->account_header.data_len, "corruption detected" ); - if( ctx->full ) { /* update index immediately */ - ulong memo = fd_vinyl_key_memo( map->seed, &phdr->key ); - fd_vinyl_meta_ele_t * ele = fd_vinyl_meta_prepare_nolock( map, &phdr->key, memo ); - if( FD_UNLIKELY( !ele ) ) FD_LOG_CRIT(( "Failed to update vinyl index (full)" )); - - if( FD_UNLIKELY( fd_vinyl_meta_ele_in_use( ele ) ) ) { - /* Drop current value if existing is newer */ - ulong exist_slot = ele->phdr.info.ul[ 1 ]; - if( exist_slot > result->account_header.slot ) { - ctx->vinyl_op.pair = NULL; - return; - } - } - - ele->memo = memo; - ele->phdr.ctl = phdr->ctl; - ele->phdr.key = phdr->key; - ele->phdr.info = phdr->info; - ele->seq = ULONG_MAX; /* later init */ - ele->line_idx = ULONG_MAX; - ctx->vinyl_op.meta_ele = ele; - } ctx->vinyl_op.pair = pair; ctx->vinyl_op.pair_sz = pair_sz; @@ -521,15 +94,18 @@ fd_snapin_process_account_header_vinyl( fd_snapin_tile_t * ctx, if( !ctx->vinyl_op.data_rem ) { bstream_push_account( ctx ); + return 1; } + return 0; } -/* fd_snapin_process_account_data_vinyl continues a bstream write (slow) */ +/* fd_snapin_process_account_data_vinyl continues processing a + fragmented account (slow). */ -void +int fd_snapin_process_account_data_vinyl( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ) { - if( FD_UNLIKELY( !ctx->vinyl_op.pair ) ) return; /* ignored account */ + if( FD_UNLIKELY( !ctx->vinyl_op.pair ) ) return 0; /* ignored account */ ulong chunk_sz = result->account_data.data_sz; if( FD_LIKELY( chunk_sz ) ) { @@ -542,106 +118,47 @@ fd_snapin_process_account_data_vinyl( fd_snapin_tile_t * ctx, } if( !ctx->vinyl_op.data_rem ) { /* finish store */ bstream_push_account( ctx ); + return 1; } + return 0; } -/* fd_snapin_process_account_batch_vinyl inserts a batch of unfragmented - accounts (fast path). - - The main optimization implemented is prefetching hash map accesses to - amortize DRAM latency. */ +/* fd_snapin_process_account_batch_vinyl processes a batch of unfragmented + accounts (fast path), converting them into vinyl bstream pairs. + A single fd_stem_publish is issued for the complete batch, and the + chunk always advances by mtu size. */ -void +int fd_snapin_process_account_batch_vinyl( fd_snapin_tile_t * ctx, fd_ssparse_advance_result_t * result ) { - fd_vinyl_meta_t * const map = ctx->vinyl.map; - fd_vinyl_meta_ele_t * const ele0 = ctx->vinyl.map->ele; - /* Derive map slot heads */ + uchar * pair = fd_chunk_to_laddr( ctx->hash_out.mem, ctx->hash_out.chunk ); - ulong memo[ FD_SSPARSE_ACC_BATCH_MAX ]; - ulong const slot_mask = map->ele_max-1UL; for( ulong i=0UL; iaccount_batch.batch[ i ]; - uchar const * pubkey = frame+0x10UL; - fd_vinyl_key_t key[1]; fd_vinyl_key_init( key, pubkey, 32UL ); - memo[ i ] = fd_vinyl_key_memo( map->seed, key ); - } - - /* Prefetch slots */ - - for( ulong i=0UL; iaccount_batch.batch[ i ]; - ulong const data_len = fd_ulong_load_8_fast( frame+0x08UL ); - uchar const * pubkey = frame+0x10UL; - fd_vinyl_key_t key[1]; fd_vinyl_key_init( key, pubkey, 32UL ); - - fd_vinyl_meta_ele_t * ele = fd_vinyl_meta_prepare_nolock( map, key, memo[ i ] ); - if( FD_UNLIKELY( !ele ) ) FD_LOG_CRIT(( "Failed to update vinyl index (full)" )); - batch_ele[ i ] = ele; - - if( FD_UNLIKELY( fd_vinyl_meta_ele_in_use( ele ) ) ) { /* key exists */ - /* Drop current value if existing is newer */ - ulong exist_slot = ele->phdr.info.ul[ 1 ]; - if( exist_slot > result->account_batch.slot ) { - batch_ele[ i ] = NULL; - continue; - } - } - - ulong val_sz = sizeof(fd_account_meta_t) + data_len; - FD_CRIT( val_sz<=FD_VINYL_VAL_MAX, "corruption detected" ); - - fd_vinyl_bstream_phdr_t * phdr = &ele->phdr; - phdr->ctl = fd_vinyl_bstream_ctl( FD_VINYL_BSTREAM_CTL_TYPE_PAIR, FD_VINYL_BSTREAM_CTL_STYLE_RAW, val_sz ); - phdr->key = *key; - phdr->info.val_sz = (uint)val_sz; - phdr->info.ul[1] = result->account_batch.slot; - - ele->memo = memo[ i ]; - ele->phdr.ctl = phdr->ctl; - ele->phdr.key = *key; - ele->phdr.info = phdr->info; - ele->seq = ULONG_MAX; /* later init */ - ele->line_idx = ULONG_MAX; - } - - /* Write out to bstream */ - - fd_vinyl_io_t * io = ctx->vinyl.io; - for( ulong i=0UL; iaccount_batch.batch[ i ]; ulong const data_len = fd_ulong_load_8_fast( frame+0x08UL ); + uchar const * pubkey = frame+0x10UL; + fd_vinyl_key_t key[1]; fd_vinyl_key_init( key, pubkey, 32UL ); ulong lamports = fd_ulong_load_8_fast( frame+0x30UL ); - uchar owner[32]; memcpy( owner, frame+0x40UL, 32UL ); + uchar owner[32]; memcpy( owner, frame+0x40UL, 32UL ); _Bool executable = !!frame[ 0x60UL ]; ulong val_sz = sizeof(fd_account_meta_t) + data_len; FD_CRIT( val_sz<=FD_VINYL_VAL_MAX, "corruption detected" ); ulong pair_sz = fd_vinyl_bstream_pair_sz( val_sz ); - uchar * pair = fd_vinyl_io_wd_alloc_fast( io, pair_sz ); - if( FD_UNLIKELY( !pair ) ) { - pair = fd_vinyl_io_wd_alloc( io, pair_sz, FD_VINYL_IO_FLAG_BLOCKING ); - } + FD_TEST( pair_sz<=ctx->hash_out.mtu ); uchar * dst = pair; ulong dst_rem = pair_sz; FD_CRIT( dst_rem >= sizeof(fd_vinyl_bstream_phdr_t), "corruption detected" ); fd_vinyl_bstream_phdr_t * phdr = (fd_vinyl_bstream_phdr_t *)dst; - *phdr = ele->phdr; + phdr->ctl = fd_vinyl_bstream_ctl( FD_VINYL_BSTREAM_CTL_TYPE_PAIR, FD_VINYL_BSTREAM_CTL_STYLE_RAW, val_sz ); + phdr->key = *key; + phdr->info.val_sz = (uint)val_sz; + phdr->info.ul[1] = result->account_batch.slot; dst += sizeof(fd_vinyl_bstream_phdr_t); dst_rem -= sizeof(fd_vinyl_bstream_phdr_t); @@ -664,22 +181,18 @@ fd_snapin_process_account_batch_vinyl( fd_snapin_tile_t * ctx, dst += data_len; dst_rem -= data_len; - ulong seq_after = fd_vinyl_io_append( io, pair, pair_sz ); - ele->seq = seq_after; + pair += pair_sz; ctx->metrics.accounts_inserted++; } + fd_stem_publish( ctx->stem, ctx->hash_out.idx, FD_SNAPSHOT_MSG_DATA/*sig*/, ctx->hash_out.chunk, FD_SSPARSE_ACC_BATCH_MAX/*sz=acc_cnt*/, 0UL, 0UL/*tsorig*/, 0UL/*tspub*/ ); + ctx->hash_out.chunk = fd_dcache_compact_next( ctx->hash_out.chunk, ctx->hash_out.mtu, ctx->hash_out.chunk0, ctx->hash_out.wmark ); + return 1; } void fd_snapin_vinyl_shutdown( fd_snapin_tile_t * ctx ) { - int commit_err = fd_vinyl_io_commit( ctx->vinyl.io, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit(io) failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); - int sync_err = fd_vinyl_io_sync( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); - if( FD_UNLIKELY( sync_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_sync(io_mm) failed (%i-%s)", sync_err, fd_vinyl_strerror( sync_err ) )); - vinyl_mm_sync( ctx ); - - fd_vinyl_io_wd_ctrl( ctx->vinyl.io_wd, FD_SNAPSHOT_MSG_CTRL_SHUTDOWN, 0UL ); + (void)ctx; } void @@ -688,71 +201,10 @@ fd_snapin_read_account_vinyl( fd_snapin_tile_t * ctx, fd_account_meta_t * meta, uchar * data, ulong data_max ) { - if( FD_UNLIKELY( ctx->vinyl.io!=ctx->vinyl.io_mm ) ) { - FD_LOG_CRIT(( "vinyl not in io_mm mode" )); - } - - memset( meta, 0, sizeof(fd_account_meta_t) ); - - /* Query database index */ - - fd_vinyl_key_t key[1]; - fd_vinyl_key_init( key, acct_addr, 32UL ); - ulong memo = fd_vinyl_key_memo( ctx->vinyl.map->seed, key ); - fd_vinyl_meta_ele_t const * ele = fd_vinyl_meta_prepare_nolock( ctx->vinyl.map, key, memo ); - if( FD_UNLIKELY( !ele || !fd_vinyl_meta_ele_in_use( ele ) ) ) { - /* account not found */ - return; - } - - uchar * mmio = fd_vinyl_mmio ( ctx->vinyl.io_mm ); - ulong mmio_sz = fd_vinyl_mmio_sz( ctx->vinyl.io_mm ); - - /* Validate index record */ - - ulong const seq0 = ele->seq; - ulong const ctl = ele->phdr.ctl; - int const ctl_type = fd_vinyl_bstream_ctl_type( ctl ); - ulong const val_esz = fd_vinyl_bstream_ctl_sz ( ctl ); - ulong const pair_sz = fd_vinyl_bstream_pair_sz( val_esz ); - ulong const seq1 = seq0 + pair_sz; - ulong const seq_past = ctx->vinyl.io->seq_past; - ulong const seq_present = ctx->vinyl.io->seq_present; - if( FD_UNLIKELY( ctl_type!=FD_VINYL_BSTREAM_CTL_TYPE_PAIR ) ) { - FD_LOG_CRIT(( "corrupt bstream record in index: ctl=%016lx", ctl )); - } - if( FD_UNLIKELY( val_eszsizeof(fd_account_meta_t)+FD_RUNTIME_ACC_SZ_MAX ) ) { - FD_LOG_CRIT(( "corrupt bstream record in index: val_esz=%lu", val_esz )); - } - int bad_past = !(fd_vinyl_seq_le( seq_past, seq0 ) & fd_vinyl_seq_lt( seq0, seq1 ) & fd_vinyl_seq_le( seq1, seq_present )); - if( FD_UNLIKELY( bad_past ) ) { - FD_LOG_CRIT(( "corrupt bstream record in index: seq[%lu,%lu) not in [seq_past=%lu,seq_present=%lu)", - seq0, seq1, seq_past, seq_present )); - } - - /* Map seq range to underlying device - In the snapshot loader, it is safe to assume that bstream reads - do not wrap around. */ - - if( FD_UNLIKELY( seq1>mmio_sz ) ) { - FD_LOG_CRIT(( "corrupt bstream record in index: seq[%lu,%lu) exceeds bstream addressable range [0,%lu)", - seq0, seq1, mmio_sz )); - } - - /* Read from bstream */ - - ulong seq_meta = seq0 + sizeof(fd_vinyl_bstream_phdr_t); - ulong seq_data = seq_meta + sizeof(fd_account_meta_t); - - memcpy( meta, mmio+seq_meta, sizeof(fd_account_meta_t) ); - if( FD_UNLIKELY( sizeof(fd_account_meta_t)+(ulong)meta->dlen > val_esz ) ) { - FD_LOG_CRIT(( "corrupt bstream record: seq0=%lu val_esz=%lu dlen=%u", seq0, val_esz, meta->dlen )); - } - if( FD_UNLIKELY( meta->dlen > data_max ) ) { - FD_BASE58_ENCODE_32_BYTES( acct_addr, acct_addr_b58 ); - FD_LOG_WARNING(( "failed to read account %s: account data size (%lu bytes) exceeds buffer size (%lu bytes)", - acct_addr_b58, (ulong)meta->dlen, data_max )); - } - memcpy( data, mmio+seq_data, meta->dlen ); + (void)ctx; + (void)acct_addr; + (void)meta; + (void)data; + (void)data_max; + FD_LOG_ERR(( "fd_snapin_read_account_vinyl is not supported here" )); } diff --git a/src/discof/restore/fd_snapwh_tile.c b/src/discof/restore/fd_snapwh_tile.c index 282120cfc33..7c5b96dd932 100644 --- a/src/discof/restore/fd_snapwh_tile.c +++ b/src/discof/restore/fd_snapwh_tile.c @@ -12,6 +12,8 @@ #define NAME "snapwh" +#define FD_SNAPWH_WR_FSEQ_CNT_MAX (16UL) + struct fd_snapwh { /* Run loop */ uint state; @@ -25,7 +27,8 @@ struct fd_snapwh { /* ACKs / flow control */ ulong * up_fseq; - ulong const * wr_fseq; + ulong const * wr_fseq[FD_SNAPWH_WR_FSEQ_CNT_MAX]; + ulong wr_fseq_cnt; ulong last_fseq; ulong next_seq; @@ -66,22 +69,22 @@ unprivileged_init( fd_topo_t * topo, FD_CRIT( fd_dcache_app_sz( in_link->dcache )>=sizeof(ulong), "in_link dcache app region too small to hold io_seed" ); snapwh->io_seed = (ulong const *)fd_dcache_app_laddr_const( in_link->dcache ); + ulong wr_fseq_cnt_exp = fd_topo_tile_name_cnt( topo, "snapwr" ); + FD_TEST( wr_fseq_cnt_exp<=FD_SNAPWH_WR_FSEQ_CNT_MAX ); + ulong wr_fseq_cnt = 0UL; fd_topo_link_t const * out_link = &topo->links[ tile->out_link_id[ 0 ] ]; - FD_TEST( fd_topo_link_reliable_consumer_cnt( topo, out_link )==1UL ); + FD_TEST( fd_topo_link_reliable_consumer_cnt( topo, out_link )==wr_fseq_cnt_exp ); for( ulong tile_idx=0UL; tile_idxtile_cnt; tile_idx++ ) { fd_topo_tile_t const * consumer_tile = &topo->tiles[ tile_idx ]; for( ulong in_idx=0UL; in_idxin_cnt; in_idx++ ) { if( consumer_tile->in_link_id[ in_idx ]==out_link->id ) { - snapwh->wr_fseq = consumer_tile->in_link_fseq[ in_idx ]; - break; + snapwh->wr_fseq[ wr_fseq_cnt ] = consumer_tile->in_link_fseq[ in_idx ]; + wr_fseq_cnt++; } } - if( snapwh->wr_fseq ) break; - } - if( FD_UNLIKELY( !snapwh->wr_fseq ) ) { - FD_LOG_ERR(( "unable to find fseq for output link %s:%lu", - out_link->name, out_link->kind_id )); } + snapwh->wr_fseq_cnt = wr_fseq_cnt; + FD_TEST( snapwh->wr_fseq_cnt==wr_fseq_cnt_exp ); snapwh->state = FD_SNAPSHOT_STATE_IDLE; snapwh->last_fseq = fd_fseq_query( snapwh->up_fseq ); @@ -131,10 +134,14 @@ before_credit( fd_snapwh_t * ctx, } /* Reverse path bubble up flow control credits received from snapwr */ - ulong wr_seq = fd_fseq_query( ctx->wr_fseq ); - if( FD_UNLIKELY( wr_seq!=ctx->last_fseq ) ) { - fd_fseq_update( ctx->up_fseq, wr_seq ); - ctx->last_fseq = wr_seq; + ulong wr_seq_min = ULONG_MAX; + for( ulong i=0; iwr_fseq_cnt; i++ ){ + ulong wr_seq = fd_fseq_query( ctx->wr_fseq[ i ] ); + wr_seq_min = fd_ulong_min( wr_seq_min, wr_seq ); + } + if( FD_UNLIKELY( wr_seq_min!=ctx->last_fseq ) ) { + fd_fseq_update( ctx->up_fseq, wr_seq_min ); + ctx->last_fseq = wr_seq_min; } } diff --git a/src/discof/restore/fd_snapwm_tile.c b/src/discof/restore/fd_snapwm_tile.c new file mode 100644 index 00000000000..48edf55edb3 --- /dev/null +++ b/src/discof/restore/fd_snapwm_tile.c @@ -0,0 +1,382 @@ +#include "fd_snapwm_tile_private.h" +#include "utils/fd_ssctrl.h" +#include "utils/fd_vinyl_io_wd.h" + +#include "../../disco/topo/fd_topo.h" +#include "../../disco/metrics/fd_metrics.h" +#include "../../flamenco/runtime/sysvar/fd_sysvar_slot_history.h" +#include "../../flamenco/runtime/fd_system_ids.h" + +#define NAME "snapwm" + +/* The snapwm tile is a state machine responsible for loading accounts + into vinyl database. It processes pre-assembled bstream pairs + and handles vinyl's meta_map and bstream actual allocation. */ + +static inline int +should_shutdown( fd_snapwm_tile_t * ctx ) { + if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_SHUTDOWN ) ) { + FD_LOG_NOTICE(( "loaded %.1fM accounts from snapshot in %.3f seconds", (double)ctx->metrics.accounts_inserted/1e6, (double)(fd_log_wallclock()-ctx->boot_timestamp)/1e9 )); + } + return ctx->state==FD_SNAPSHOT_STATE_SHUTDOWN; +} + +static ulong +scratch_align( void ) { + return 512UL; +} + +static ulong +scratch_footprint( fd_topo_tile_t const * tile ) { + (void)tile; + ulong l = FD_LAYOUT_INIT; + l = FD_LAYOUT_APPEND( l, alignof(fd_snapwm_tile_t), sizeof(fd_snapwm_tile_t) ); + l = FD_LAYOUT_APPEND( l, fd_vinyl_io_wd_align(), fd_vinyl_io_wd_footprint( tile->snapwm.snapwr_depth ) ); + l = FD_LAYOUT_APPEND( l, fd_vinyl_io_mm_align(), fd_vinyl_io_mm_footprint( FD_SNAPWM_IO_SPAD_MAX ) ); + return FD_LAYOUT_FINI( l, scratch_align() ); +} + +static void +metrics_write( fd_snapwm_tile_t * ctx ) { + FD_MGAUGE_SET( SNAPIN, FULL_BYTES_READ, ctx->metrics.full_bytes_read ); + FD_MGAUGE_SET( SNAPIN, INCREMENTAL_BYTES_READ, ctx->metrics.incremental_bytes_read ); + FD_MGAUGE_SET( SNAPIN, ACCOUNTS_INSERTED, ctx->metrics.accounts_inserted ); + FD_MGAUGE_SET( SNAPIN, STATE, (ulong)ctx->state ); +} + +static void +transition_malformed( fd_snapwm_tile_t * ctx, + fd_stem_context_t * stem ) { + ctx->state = FD_SNAPSHOT_STATE_ERROR; + fd_stem_publish( stem, ctx->out_ct_idx, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL ); +} + +/* verify_slot_deltas_with_slot_history verifies the 'SlotHistory' + sysvar account after loading a snapshot. The full database + architecture is only instantiated after snapshot loading, so this + function uses a primitive/cache-free mechanism to query the parts of + the account database that are available. + + Returns 0 if verification passed, -1 if not. */ + +static int +verify_slot_deltas_with_slot_history( fd_snapwm_tile_t * ctx ) { + /* Do a raw read of the slot history sysvar account from the database. + Requires approx 500kB stack space. */ + + fd_account_meta_t meta; + uchar data[ FD_SYSVAR_SLOT_HISTORY_BINCODE_SZ ]; + union { + uchar buf[ FD_SYSVAR_SLOT_HISTORY_FOOTPRINT ]; + fd_slot_history_global_t o; + } decoded; + FD_STATIC_ASSERT( offsetof( __typeof__(decoded), buf)==offsetof( __typeof__(decoded), o ), memory_layout ); + fd_snapwm_vinyl_read_account( ctx, &fd_sysvar_slot_history_id, &meta, data, sizeof(data) ); + + if( FD_UNLIKELY( !meta.lamports || !meta.dlen ) ) { + FD_LOG_WARNING(( "SlotHistory sysvar account missing or empty" )); + return -1; + } + if( FD_UNLIKELY( meta.dlen > FD_SYSVAR_SLOT_HISTORY_BINCODE_SZ ) ) { + FD_LOG_WARNING(( "SlotHistory sysvar account data too large: %u bytes", meta.dlen )); + return -1; + } + if( FD_UNLIKELY( !fd_memeq( meta.owner, fd_sysvar_owner_id.uc, sizeof(fd_pubkey_t) ) ) ) { + FD_BASE58_ENCODE_32_BYTES( meta.owner, owner_b58 ); + FD_LOG_WARNING(( "SlotHistory sysvar owner is invalid: %s != sysvar_owner_id", owner_b58 )); + return -1; + } + + if( FD_UNLIKELY( + !fd_bincode_decode_static_global( + slot_history, + &decoded.o, + data, + meta.dlen, + NULL ) + ) ) { + FD_LOG_WARNING(( "SlotHistory sysvar account data is corrupt" )); + return -1; + } + + ulong txncache_entries_len = fd_ulong_load_8( ctx->txncache_entries_len_ptr ); + if( FD_UNLIKELY( !txncache_entries_len ) ) FD_LOG_WARNING(( "txncache_entries_len %lu", txncache_entries_len )); + + for( ulong i=0UL; itxncache_entries[i]; + if( FD_UNLIKELY( fd_sysvar_slot_history_find_slot( &decoded.o, entry->slot )!=FD_SLOT_HISTORY_SLOT_FOUND ) ) { + FD_LOG_WARNING(( "slot %lu missing from SlotHistory sysvar account", entry->slot )); + return -1; + } + } + return 0; +} + +static int +handle_data_frag( fd_snapwm_tile_t * ctx, + ulong chunk, + ulong acc_cnt, + fd_stem_context_t * stem ) { + if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_FINISHING ) ) { + transition_malformed( ctx, stem ); + return 0; + } + else if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_ERROR ) ) { + /* Ignore all data frags after observing an error in the stream until + we receive fail & init control messages to restart processing. */ + return 0; + } + else if( FD_UNLIKELY( ctx->state!=FD_SNAPSHOT_STATE_PROCESSING ) ) { + FD_LOG_ERR(( "invalid state for data frag %d", ctx->state )); + } + + FD_TEST( chunk>=ctx->in.chunk0 && chunk<=ctx->in.wmark && (acc_cnt*(16UL<<20))<=ctx->in.mtu ); + + fd_snapwm_vinyl_process_account( ctx, chunk, acc_cnt ); + + return 0; +} + +static void +handle_control_frag( fd_snapwm_tile_t * ctx, + fd_stem_context_t * stem, + ulong sig ) { + switch( sig ) { + case FD_SNAPSHOT_MSG_CTRL_INIT_FULL: + case FD_SNAPSHOT_MSG_CTRL_INIT_INCR: + FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE ); + ctx->state = FD_SNAPSHOT_STATE_PROCESSING; + ctx->full = sig==FD_SNAPSHOT_MSG_CTRL_INIT_FULL; + if( sig==FD_SNAPSHOT_MSG_CTRL_INIT_INCR ) { + fd_snapwm_vinyl_txn_begin( ctx ); + } + fd_snapwm_vinyl_wd_init( ctx ); + break; + + case FD_SNAPSHOT_MSG_CTRL_FAIL: + FD_TEST( ctx->state==FD_SNAPSHOT_STATE_PROCESSING || + ctx->state==FD_SNAPSHOT_STATE_ERROR ); + ctx->state = FD_SNAPSHOT_STATE_IDLE; + + fd_snapwm_vinyl_wd_fini( ctx ); + if( ctx->vinyl.txn_active ) { + fd_snapwm_vinyl_txn_cancel( ctx ); + } + break; + + case FD_SNAPSHOT_MSG_CTRL_NEXT: { + FD_TEST( ctx->state==FD_SNAPSHOT_STATE_PROCESSING || + ctx->state==FD_SNAPSHOT_STATE_ERROR ); + ctx->state = FD_SNAPSHOT_STATE_IDLE; + + fd_snapwm_vinyl_wd_fini( ctx ); + if( ctx->vinyl.txn_active ) { + fd_snapwm_vinyl_txn_commit( ctx ); + } + break; + } + + case FD_SNAPSHOT_MSG_CTRL_DONE: { + FD_TEST( ctx->state==FD_SNAPSHOT_STATE_PROCESSING || + ctx->state==FD_SNAPSHOT_STATE_ERROR ); + ctx->state = FD_SNAPSHOT_STATE_IDLE; + + fd_snapwm_vinyl_wd_fini( ctx ); + if( ctx->vinyl.txn_active ) { + fd_snapwm_vinyl_txn_commit( ctx ); + } + + if( FD_UNLIKELY( verify_slot_deltas_with_slot_history( ctx ) ) ) { + FD_LOG_WARNING(( "slot deltas verification failed" )); + transition_malformed( ctx, stem ); + break; + } + break; + } + + case FD_SNAPSHOT_MSG_CTRL_SHUTDOWN: + FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE ); + ctx->state = FD_SNAPSHOT_STATE_SHUTDOWN; + fd_snapwm_vinyl_shutdown( ctx ); + break; + + case FD_SNAPSHOT_MSG_CTRL_ERROR: + ctx->state = FD_SNAPSHOT_STATE_ERROR; + fd_snapwm_vinyl_wd_fini( ctx ); + if( ctx->vinyl.txn_active ) { + fd_snapwm_vinyl_txn_cancel( ctx ); + } + break; + + default: + FD_LOG_ERR(( "unexpected control sig %lu", sig )); + return; + } + + /* Forward the control message down the pipeline */ + fd_stem_publish( stem, ctx->out_ct_idx, sig, 0UL, 0UL, 0UL, 0UL, 0UL ); +} + +static inline int +returnable_frag( fd_snapwm_tile_t * ctx, + ulong in_idx FD_PARAM_UNUSED, + ulong seq FD_PARAM_UNUSED, + ulong sig, + ulong chunk, + ulong sz, + ulong ctl FD_PARAM_UNUSED, + ulong tsorig FD_PARAM_UNUSED, + ulong tspub FD_PARAM_UNUSED, + fd_stem_context_t * stem ) { + FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN ); + + ctx->stem = stem; + if( FD_UNLIKELY( sig==FD_SNAPSHOT_MSG_DATA ) ) return handle_data_frag( ctx, chunk, sz/*acc_cnt*/, stem ); + else handle_control_frag( ctx, stem, sig ); + ctx->stem = NULL; + + return 0; +} + +static ulong +populate_allowed_fds( fd_topo_t const * topo FD_PARAM_UNUSED, + fd_topo_tile_t const * tile FD_PARAM_UNUSED, + ulong out_fds_cnt, + int * out_fds ) { + if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt )); + + ulong out_cnt = 0; + out_fds[ out_cnt++ ] = 2UL; /* stderr */ + if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) ) { + out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */ + } + + return out_cnt; +} + +static ulong +populate_allowed_seccomp( fd_topo_t const * topo, + fd_topo_tile_t const * tile, + ulong out_cnt, + struct sock_filter * out ) { + (void)topo; (void)tile; + return fd_snapwm_vinyl_seccomp( out_cnt, out ); +} + +static void +privileged_init( fd_topo_t * topo, + fd_topo_tile_t * tile ) { + fd_snapwm_tile_t * ctx = fd_topo_obj_laddr( topo, tile->tile_obj_id ); + memset( ctx, 0, sizeof(fd_snapwm_tile_t) ); + FD_TEST( fd_rng_secure( &ctx->seed, 8UL ) ); + + if( !tile->snapwm.lthash_disabled ) { + FD_LOG_WARNING(( "lthash verficiation for vinyl not yet implemented" )); + tile->snapwm.lthash_disabled = 1; + } + + fd_snapwm_vinyl_privileged_init( ctx, topo, tile ); +} + +static inline fd_snapwm_out_link_t +out1( fd_topo_t const * topo, + fd_topo_tile_t const * tile, + char const * name ) { + ulong idx = fd_topo_find_tile_out_link( topo, tile, name, 0UL ); + + if( FD_UNLIKELY( idx==ULONG_MAX ) ) return (fd_snapwm_out_link_t){ .idx = ULONG_MAX, .mem = NULL, .chunk0 = 0, .wmark = 0, .chunk = 0, .mtu = 0 }; + + ulong mtu = topo->links[ tile->out_link_id[ idx ] ].mtu; + if( FD_UNLIKELY( mtu==0UL ) ) return (fd_snapwm_out_link_t){ .idx = idx, .mem = NULL, .chunk0 = ULONG_MAX, .wmark = ULONG_MAX, .chunk = ULONG_MAX, .mtu = mtu }; + + void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp; + ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache ); + ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, mtu ); + return (fd_snapwm_out_link_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0, .mtu = mtu }; +} + +FD_FN_UNUSED static void +unprivileged_init( fd_topo_t * topo, + fd_topo_tile_t * tile ) { + void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id ); + + FD_SCRATCH_ALLOC_INIT( l, scratch ); + fd_snapwm_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_snapwm_tile_t), sizeof(fd_snapwm_tile_t) ); + void * _io_wd = FD_SCRATCH_ALLOC_APPEND( l, fd_vinyl_io_wd_align(), fd_vinyl_io_wd_footprint( tile->snapwm.snapwr_depth ) ); + void * _io_mm = FD_SCRATCH_ALLOC_APPEND( l, fd_vinyl_io_mm_align(), fd_vinyl_io_mm_footprint( FD_SNAPWM_IO_SPAD_MAX ) ); + + ctx->full = 1; + ctx->state = FD_SNAPSHOT_STATE_IDLE; + ctx->lthash_disabled = tile->snapwm.lthash_disabled; + + ctx->boot_timestamp = fd_log_wallclock(); + + fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) ); + + if( FD_UNLIKELY( tile->kind_id ) ) FD_LOG_ERR(( "There can only be one `" NAME "` tile" )); + if( FD_UNLIKELY( tile->in_cnt!=2UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu ins, expected 2", tile->in_cnt )); + + ulong out_link_ct_idx = fd_topo_find_tile_out_link( topo, tile, "snapwm_ct", 0UL ); + if( out_link_ct_idx==ULONG_MAX ) out_link_ct_idx = fd_topo_find_tile_out_link( topo, tile, "snapwm_ls", 0UL ); + if( FD_UNLIKELY( out_link_ct_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `" NAME "` missing required out link `snapwm_ct` or `snapwm_ls`" )); + fd_topo_link_t * snapwm_out_link = &topo->links[ tile->out_link_id[ out_link_ct_idx ] ]; + ctx->out_ct_idx = out_link_ct_idx; + + if( 0==strcmp( snapwm_out_link->name, "snapwm_ls" ) ) { + ctx->hash_out = out1( topo, tile, "snapwm_ls" ); + } + + for( ulong i=0UL; iin_cnt; i++ ) { + fd_topo_link_t const * in_link = &topo->links[ tile->in_link_id[ i ] ]; + if( 0==strcmp( in_link->name, "snapin_wm" ) ) { + fd_topo_wksp_t const * in_wksp = &topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ]; + ctx->in.wksp = in_wksp->wksp; + ctx->in.chunk0 = fd_dcache_compact_chunk0( ctx->in.wksp, in_link->dcache ); + ctx->in.wmark = fd_dcache_compact_wmark( ctx->in.wksp, in_link->dcache, in_link->mtu ); + ctx->in.mtu = in_link->mtu; + ctx->in.pos = 0UL; + } else if( 0==strcmp( in_link->name, "snapin_txn" ) ) { + fd_topo_wksp_t * in_wksp = &topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ]; + ulong chunk0 = fd_dcache_compact_chunk0( in_wksp->wksp, in_link->dcache ); + fd_sstxncache_entry_t * txncache_base = fd_chunk_to_laddr( in_wksp->wksp, chunk0 ); + ctx->txncache_entries_len_ptr = (ulong*)txncache_base; + FD_TEST( sizeof(ulong)<=sizeof(fd_sstxncache_entry_t) ); + ctx->txncache_entries = txncache_base + 1UL; + } else { + FD_LOG_ERR(( "tile `" NAME "` unrecognized in link %s", in_link->name )); + } + } + FD_TEST( !!ctx->in.wksp ); + FD_TEST( !!ctx->txncache_entries ); + + fd_snapwm_vinyl_unprivileged_init( ctx, topo, tile, _io_mm, _io_wd ); +} + +/* Control fragments can result in one extra publish to forward the + message down the pipeline, in addition to the result / malformed + message. Can send one duplicate account message as well. */ +#define STEM_BURST 3UL + +#define STEM_LAZY 1000L + +#define STEM_CALLBACK_CONTEXT_TYPE fd_snapwm_tile_t +#define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_snapwm_tile_t) + +#define STEM_CALLBACK_SHOULD_SHUTDOWN should_shutdown +#define STEM_CALLBACK_METRICS_WRITE metrics_write +#define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag + +#include "../../disco/stem/fd_stem.c" + +fd_topo_run_tile_t fd_tile_snapwm = { + .name = NAME, + .populate_allowed_fds = populate_allowed_fds, + .populate_allowed_seccomp = populate_allowed_seccomp, + .scratch_align = scratch_align, + .scratch_footprint = scratch_footprint, + .privileged_init = privileged_init, + .unprivileged_init = unprivileged_init, + .run = stem_run, +}; + +#undef NAME diff --git a/src/discof/restore/fd_snapwm_tile_private.h b/src/discof/restore/fd_snapwm_tile_private.h new file mode 100644 index 00000000000..10b0c7b5afa --- /dev/null +++ b/src/discof/restore/fd_snapwm_tile_private.h @@ -0,0 +1,173 @@ +#ifndef HEADER_fd_discof_restore_fd_snapwm_tile_private_h +#define HEADER_fd_discof_restore_fd_snapwm_tile_private_h + +/* fd_snapwm_tile_private.h contains private APIs for the "snapwm" tile, + which is the tile responsible for directing vinyl database writes. */ + +#include "utils/fd_slot_delta_parser.h" +#include "../../disco/stem/fd_stem.h" +#include "../../disco/topo/fd_topo.h" +#include "../../vinyl/io/fd_vinyl_io.h" +#include "../../vinyl/meta/fd_vinyl_meta.h" + +struct fd_snapwm_out_link { + ulong idx; + fd_wksp_t * mem; + ulong chunk0; + ulong wmark; + ulong chunk; + ulong mtu; +}; +typedef struct fd_snapwm_out_link fd_snapwm_out_link_t; + +struct fd_snapwm_tile { + int state; + uint full : 1; /* loading a full snapshot? */ + uint lthash_disabled : 1; /* disable lthash checking? */ + + ulong seed; + long boot_timestamp; + + fd_stem_context_t * stem; + + fd_sstxncache_entry_t * txncache_entries; + ulong * txncache_entries_len_ptr; + + struct { + ulong full_bytes_read; + ulong incremental_bytes_read; + ulong accounts_inserted; + } metrics; + + struct { + fd_wksp_t * wksp; + ulong chunk0; + ulong wmark; + ulong mtu; + ulong pos; + } in; + + ulong out_ct_idx; + fd_snapwm_out_link_t hash_out; + + struct { + uchar * bstream_mem; + ulong bstream_sz; + + /* Vinyl in either io_wd or io_mm mode */ + fd_vinyl_io_t * io; + fd_vinyl_io_t * io_wd; + fd_vinyl_io_t * io_mm; + ulong io_seed; + + fd_vinyl_meta_t map[1]; + + ulong txn_seq; /* bstream seq of first txn record (in [seq_past,seq_present]) */ + uint txn_active : 1; + } vinyl; +}; + +typedef struct fd_snapwm_tile fd_snapwm_tile_t; + +FD_PROTOTYPES_BEGIN + +#define FD_SNAPWM_IO_SPAD_MAX (64UL<<20) /* 64 MiB of I/O scratch space */ + +/* fd_snapwm_vinyl_privileged_init performs administrative tasks, such + as opening and mapping the bstream file descriptor. */ + +void +fd_snapwm_vinyl_privileged_init( fd_snapwm_tile_t * ctx, + fd_topo_t * topo, + fd_topo_tile_t * tile ); + +/* fd_snapwm_vinyl_unprivileged_init performs setup tasks after being + sandboxed. (anything that might be exposed to untrusted data) */ + +void +fd_snapwm_vinyl_unprivileged_init( fd_snapwm_tile_t * ctx, + fd_topo_t * topo, + fd_topo_tile_t * tile, + void * io_mm_mem, + void * io_wd_mem ); + +/* fd_snapwm_vinyl_seccomp returns a seccomp sandbox policy suitable + for vinyl operation. */ + +ulong +fd_snapwm_vinyl_seccomp( ulong out_cnt, + struct sock_filter * out ); + +/* fd_snapwm_vinyl_reset pauses the snapwr tile (waits for the snapwr + tile to ack) and formats a bstream file to be empty. THIS IS A + DESTRUCTIVE ACTION. */ + +void +fd_snapwm_vinyl_reset( fd_snapwm_tile_t * ctx ); + +/* fd_snapwm_vinyl_txn_begin starts a transactional burst write. + Assumes vinyl uses the io_mm backend. The write can then either be + committed or cancelled. There is no practical limit on the size of + this burst. */ + +void +fd_snapwm_vinyl_txn_begin( fd_snapwm_tile_t * ctx ); + +/* fd_snapwm_vinyl_txn_commit finishes a transactional burst write. + Assumes vinyl uses the io_mm backend. Reads through bstream records + written since txn_begin was called and updates the vinyl_meta index. */ + +void +fd_snapwm_vinyl_txn_commit( fd_snapwm_tile_t * ctx ); + +/* fd_snapwm_vinyl_txn_cancel abandons a transactional burst write. + Assumes vinyl uses the io_mm backend. Reverts the bstream state to + when txn_begin was called. */ + +void +fd_snapwm_vinyl_txn_cancel( fd_snapwm_tile_t * ctx ); + +/* fd_snapwm_vinyl_wd_init transitions the vinyl backend from generic + vinyl accessor (io_mm) to fast dumb direct account insertion (io_wd). + This must be called before calling fd_snapwm_process_account_*. + Starts the snapwr tile (waits for the snapwr tile to ack). */ + +void +fd_snapwm_vinyl_wd_init( fd_snapwm_tile_t * ctx ); + +/* fd_snapwm_vinyl_wd_fini transitions the vinyl backend from fast dumb + direct account insertion (io_wd) back to generic mode (io_mm). + Pauses the snapwr tile (waits for the snapwr to ack). */ + +void +fd_snapwm_vinyl_wd_fini( fd_snapwm_tile_t * ctx ); + +/* fd_snapwm_vinyl_shutdown instructs vinyl-related tiles of the loader + to shut down. Blocks until all affected tiles have acknowledged the + shutdown signal. */ + +void +fd_snapwm_vinyl_shutdown( fd_snapwm_tile_t * ctx ); + +/* fd_snapwm_vinyl_process_account reads a set of pre-generated bstream + pairs and decides whether to actually add then to the vinyl database. + It supports batch mode as well as single account (pair). */ + +void +fd_snapwm_vinyl_process_account( fd_snapwm_tile_t * ctx, + ulong chunk, + ulong acc_cnt ); + +/* fd_snapwm_vinyl_read_account retrieves an account from the vinyl + database. */ + +void +fd_snapwm_vinyl_read_account( fd_snapwm_tile_t * ctx, + void const * acct_addr, + fd_account_meta_t * meta, + uchar * data, + ulong data_max ); + +FD_PROTOTYPES_END + +#endif /* HEADER_fd_discof_restore_fd_snapwm_tile_private_h */ diff --git a/src/discof/restore/fd_snapwm_tile_vinyl.c b/src/discof/restore/fd_snapwm_tile_vinyl.c new file mode 100644 index 00000000000..16f0533dcf4 --- /dev/null +++ b/src/discof/restore/fd_snapwm_tile_vinyl.c @@ -0,0 +1,564 @@ +#define _DEFAULT_SOURCE /* madvise */ +#include "fd_snapwm_tile_private.h" +#include "utils/fd_ssctrl.h" +#include "utils/fd_vinyl_io_wd.h" + +#include +#include /* open */ +#include /* mmap, madvise */ +#include /* fstat */ +#include /* close */ + +#include "generated/fd_snapwm_tile_vinyl_seccomp.h" + +/**********************************************************************\ + + Vinyl 101: + - Vinyl is Firedancer's main account database + - Vinyl is comprised of several components on-disk and in-memory + - vinyl_bstream is a single file containing all vinyl records + - vinyl_bstream is the source of truth + - vinyl_meta indexes the latest revisions of all elements in + vinyl_bstream + - Vinyl has an in-memory caching layer, but snapwm does not use it + + The snapshot loader must: + - Load the most recent version of each account into bstream + - Create a full vinyl_meta index of accounts + - Recover from load failures and retry + + Note on I/O layers: + - io_mm is the slow/generic memory mapped I/O backend. + - io_wd is the fast/dumb O_DIRECT backend. Can only append, thus used + for hot path account writing. + - io_mm and io_wd cannot be active at the same time -- snapwm will + switch between them as necessary. + + Full snapshot logic: + - Write accounts to bstream (io_wd) + - Synchronously populate the vinyl_meta index while writing + - On load failure, destroy and recreate the bstream (io_mm) + + Incremental snapshot logic: + - Phase 1: while reading the incremental snapshot + - Write accounts to bstream without updating the index (io_wd) + - On load failure, undo writes done to bstream (io_mm) + - Phase 2: once read is done + - Replay all elements written to bstream (io_mm) + - Populate the vinyl_meta index while replaying + +\**********************************************************************/ + +void +fd_snapwm_vinyl_privileged_init( fd_snapwm_tile_t * ctx, + fd_topo_t * topo, + fd_topo_tile_t * tile ) { + void * shmap = fd_topo_obj_laddr( topo, tile->snapwm.vinyl_meta_map_obj_id ); + void * shele = fd_topo_obj_laddr( topo, tile->snapwm.vinyl_meta_pool_obj_id ); + + FD_TEST( fd_vinyl_meta_join( ctx->vinyl.map, shmap, shele ) ); + + /* Set up io_mm dependencies */ + + char const * bstream_path = tile->snapwm.vinyl_path; + int bstream_fd = open( bstream_path, O_RDWR|O_CLOEXEC, 0644 ); + if( FD_UNLIKELY( bstream_fd<0 ) ) { + FD_LOG_ERR(( "open(%s,O_RDWR|O_CLOEXEC,0644) failed (%i-%s)", + bstream_path, errno, fd_io_strerror( errno ) )); + } + + struct stat st; + if( FD_UNLIKELY( fstat( bstream_fd, &st )!=0 ) ) { + FD_LOG_ERR(( "fstat(%s) failed (%i-%s)", + bstream_path, errno, fd_io_strerror( errno ) )); + } + ulong bstream_sz = (ulong)st.st_size; + if( FD_UNLIKELY( !fd_ulong_is_aligned( bstream_sz, FD_VINYL_BSTREAM_BLOCK_SZ ) ) ) { + FD_LOG_ERR(( "vinyl file %s has misaligned size (%lu bytes)", bstream_path, bstream_sz )); + } + + void * bstream_mem = mmap( NULL, bstream_sz, PROT_READ|PROT_WRITE, MAP_SHARED, bstream_fd, 0 ); + if( FD_UNLIKELY( bstream_mem==MAP_FAILED ) ) { + FD_LOG_ERR(( "mmap(sz=%lu,PROT_READ|PROT_WRITE,MAP_SHARED,path=%s,off=0) failed (%i-%s)", + bstream_sz, bstream_path, errno, fd_io_strerror( errno ) )); + } + + if( FD_UNLIKELY( 0!=close( bstream_fd ) ) ) { /* clean up unused fd */ + FD_LOG_ERR(( "close(fd=%i) failed (%i-%s)", + bstream_fd, errno, fd_io_strerror( errno ) )); + } + + ctx->vinyl.bstream_mem = bstream_mem; + ctx->vinyl.bstream_sz = bstream_sz; + + FD_TEST( fd_rng_secure( &ctx->vinyl.io_seed, 8UL ) ); +} + +static void +io_mm_align_4k( fd_snapwm_tile_t * ctx ) { + fd_vinyl_io_t * io_mm = ctx->vinyl.io_mm; + if( FD_UNLIKELY( io_mm->seq_future!=0UL ) ) { + FD_LOG_CRIT(( "unexpected io_mm state (seq_future=%lu)", io_mm->seq_future )); + } + uchar * mmio = fd_vinyl_mmio ( io_mm ); + ulong mmio_sz = fd_vinyl_mmio_sz( io_mm ); + + ulong bstream_preamble = fd_ulong_align_up( FD_VINYL_BSTREAM_BLOCK_SZ, 4096UL ) - FD_VINYL_BSTREAM_BLOCK_SZ; + FD_CRIT( bstream_preamble<=mmio_sz, "bstream too small for 4k alignment" ); + + fd_memset( mmio, 0, bstream_preamble ); + io_mm->seq_present += bstream_preamble; + io_mm->seq_future += bstream_preamble; +} + +void +fd_snapwm_vinyl_unprivileged_init( fd_snapwm_tile_t * ctx, + fd_topo_t * topo, + fd_topo_tile_t * tile, + void * io_mm_mem, + void * io_wd_mem ) { + + /* Set up io_mm */ + + ctx->vinyl.io_mm = + fd_vinyl_io_mm_init( io_mm_mem, + FD_SNAPWM_IO_SPAD_MAX, + ctx->vinyl.bstream_mem, + ctx->vinyl.bstream_sz, + 1, + "accounts-v0", 12UL, + ctx->vinyl.io_seed ); + if( FD_UNLIKELY( !ctx->vinyl.io_mm ) ) { + FD_LOG_ERR(( "fd_vinyl_io_mm_init failed" )); + } + + /* Write out zero blocks to align the bstream by 4096 bytes + (Assuming a 512 byte sync block) */ + + io_mm_align_4k( ctx ); + + /* Set up io_wd dependencies */ + + ulong wr_link_id = fd_topo_find_tile_out_link( topo, tile, "snapwm_wh", 0UL ); + if( FD_UNLIKELY( wr_link_id==ULONG_MAX ) ) FD_LOG_CRIT(( "snapwm_wh link not found" )); + fd_topo_link_t * wr_link = &topo->links[ tile->out_link_id[ wr_link_id ] ]; + + if( FD_UNLIKELY( tile->snapwm.snapwr_depth != fd_mcache_depth( wr_link->mcache ) ) ) { + /* FIXME TOCTOU issue ... A malicious downstream tile could + theoretically corrupt mcache->depth and cause an OOB access + while snapwm is still initializing. Practically not an + issue because the system is not exposed to attacker- + controlled input at boot time. */ + FD_LOG_CRIT(( "snapwm_wr link mcache depth %lu does not match snapwr_depth %lu", + fd_mcache_depth( wr_link->mcache ), tile->snapwm.snapwr_depth )); + } + + if( FD_UNLIKELY( fd_topo_link_reliable_consumer_cnt( topo, wr_link )!=1UL ) ) { + FD_LOG_CRIT(( "snapwm_wr link must have exactly one reliable consumer" )); + } + + ulong wh_tile_id = fd_topo_find_tile( topo, "snapwh", 0UL ); + FD_TEST( wh_tile_id!=ULONG_MAX ); + fd_topo_tile_t * wh_tile = &topo->tiles[ wh_tile_id ]; + FD_TEST( wh_tile->in_cnt==1 ); + FD_TEST( wh_tile->in_link_id[0] == wr_link->id ); + FD_CRIT( 0==strcmp( topo->links[ wh_tile->in_link_id[ 0 ] ].name, "snapwm_wh" ), "unexpected link found" ); + ulong const * wh_fseq = wh_tile->in_link_fseq[ 0 ]; + if( FD_UNLIKELY( !wh_fseq ) ) { + FD_LOG_CRIT(( "snapwm_wr link reliable consumer fseq not found" )); + } + + /* Set up io_wd */ + + ctx->vinyl.io_wd = + fd_vinyl_io_wd_init( io_wd_mem, + ctx->vinyl.bstream_sz, + ctx->vinyl.io_mm->seed, + wr_link->mcache, + wr_link->dcache, + wh_fseq, + wr_link->mtu ); + if( FD_UNLIKELY( !ctx->vinyl.io_wd ) ) { + FD_LOG_ERR(( "fd_vinyl_io_wd_init failed" )); + } + + /* Start by using io_mm */ + + ctx->vinyl.io = ctx->vinyl.io_mm; +} + +ulong +fd_snapwm_vinyl_seccomp( ulong out_cnt, + struct sock_filter * out ) { + populate_sock_filter_policy_fd_snapwm_tile_vinyl( out_cnt, out, (uint)fd_log_private_logfile_fd() ); + return sock_filter_policy_fd_snapwm_tile_vinyl_instr_cnt; +} + +static void +vinyl_mm_sync( fd_snapwm_tile_t * ctx ) { + if( FD_UNLIKELY( 0!=msync( ctx->vinyl.bstream_mem, ctx->vinyl.bstream_sz, MS_SYNC ) ) ) { + FD_LOG_ERR(( "msync(addr=%p,sz=%lu,MS_SYNC) failed (%i-%s)", + (void *)ctx->vinyl.bstream_mem, ctx->vinyl.bstream_sz, + errno, fd_io_strerror( errno ) )); + } +} + +/* Faster vinyl meta accesses *****************************************/ + +static fd_vinyl_meta_ele_t * +fd_vinyl_meta_prepare_nolock( fd_vinyl_meta_t * join, + fd_vinyl_key_t const * key, + ulong memo ) { + fd_vinyl_meta_ele_t * ele0 = join->ele; + ulong ele_max = join->ele_max; + ulong probe_max = join->probe_max; + void * ctx = join->ctx; + + ulong start_idx = memo & (ele_max-1UL); + + for(;;) { + + ulong ele_idx = start_idx; + + for( ulong probe_rem=probe_max; probe_rem; probe_rem-- ) { + fd_vinyl_meta_ele_t * ele = ele0 + ele_idx; + + if( FD_LIKELY( fd_vinyl_meta_private_ele_is_free( ctx, ele ) ) || /* opt for low collision */ + ( + FD_LIKELY( ele->memo==memo ) && + FD_LIKELY( fd_vinyl_key_eq( &ele->phdr.key, key ) ) /* opt for already in map */ + ) ) { + return ele; + } + + ele_idx = (ele_idx+1UL) & (ele_max-1UL); + } + + return NULL; + + } + + /* never get here */ +} + +/* Transactional APIs *************************************************/ + +void +fd_snapwm_vinyl_txn_begin( fd_snapwm_tile_t * ctx ) { + FD_CRIT( !ctx->vinyl.txn_active, "txn_begin called while already in txn" ); + FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); + fd_vinyl_io_t * io = ctx->vinyl.io_mm; + + /* Finish any outstanding writes */ + int commit_err = fd_vinyl_io_commit( io, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); + + ctx->vinyl.txn_seq = io->seq_present; + ctx->vinyl.txn_active = 1; +} + +void +fd_snapwm_vinyl_txn_commit( fd_snapwm_tile_t * ctx ) { + FD_CRIT( ctx->vinyl.txn_active, "txn_commit called while not in txn" ); + FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); + fd_vinyl_io_t * io = ctx->vinyl.io_mm; + + long dt = -fd_log_wallclock(); + + /* Finish any outstanding writes */ + + int commit_err = fd_vinyl_io_commit( io, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); + + /* Hint to kernel to start prefetching to speed up reads */ + + uchar * mmio = fd_vinyl_mmio ( io ); FD_TEST( mmio ); + ulong mmio_sz = fd_vinyl_mmio_sz( io ); + + ulong txn_seq0 = ctx->vinyl.txn_seq; + ulong txn_seq1 = ctx->vinyl.io_mm->seq_present; + FD_LOG_INFO(( "vinyl txn_commit starting for seq [%lu,%lu)", txn_seq0, txn_seq1 )); + ulong txn_sz = txn_seq1-txn_seq0; + FD_CRIT( fd_vinyl_seq_le( txn_seq0, txn_seq1 ), "invalid txn seq range" ); + FD_CRIT( txn_seq1 <= mmio_sz, "invalid txn seq range" ); + if( FD_UNLIKELY( fd_vinyl_seq_eq( txn_seq0, txn_seq1 ) ) ) return; + + void * madv_base = (void *)fd_ulong_align_dn( (ulong)mmio+txn_seq0, FD_SHMEM_NORMAL_PAGE_SZ ); + ulong madv_sz = /* */fd_ulong_align_up( txn_sz, FD_SHMEM_NORMAL_PAGE_SZ ); + if( FD_UNLIKELY( madvise( madv_base, madv_sz, MADV_SEQUENTIAL ) ) ) { + FD_LOG_WARNING(( "madvise(addr=%p,sz=%lu,MADV_SEQUENTIAL) failed (%i-%s)", + madv_base, madv_sz, + errno, fd_io_strerror( errno ) )); + } + + /* Replay incremental account updates */ + + fd_vinyl_meta_t * meta_map = ctx->vinyl.map; + for( ulong seq=txn_seq0; fd_vinyl_seq_lt( seq, txn_seq1 ); ) { + fd_vinyl_bstream_block_t * block = (void *)( mmio+seq ); + + /* Speculatively read block info */ + ulong ctl = FD_VOLATILE_CONST( block->ctl ); + fd_vinyl_bstream_phdr_t phdr = FD_VOLATILE_CONST( block->phdr ); + + ulong val_esz = fd_vinyl_bstream_ctl_sz ( ctl ); + int block_type = fd_vinyl_bstream_ctl_type( ctl ); + ulong block_sz; + + if( FD_LIKELY( block_type==FD_VINYL_BSTREAM_CTL_TYPE_PAIR ) ) { + block_sz = fd_vinyl_bstream_pair_sz( val_esz ); + ulong memo = fd_vinyl_key_memo( meta_map->seed, &phdr.key ); + fd_vinyl_meta_ele_t * ele = fd_vinyl_meta_prepare_nolock( meta_map, &phdr.key, memo ); + if( FD_UNLIKELY( !ele ) ) FD_LOG_CRIT(( "fd_vinyl_meta_prepare failed (full)" )); + + /* Erase value if existing is newer */ + if( FD_UNLIKELY( fd_vinyl_meta_ele_in_use( ele ) ) ) { /* key exists */ + ulong exist_slot = ele->phdr.info.ul[ 1 ]; + ulong cur_slot = phdr.info.ul[ 1 ]; + if( exist_slot > cur_slot ) { + fd_memset( block, 0, block_sz ); + goto next; + } + } + + /* Overwrite map entry */ + ele->memo = memo; + ele->phdr = phdr; + ele->seq = seq; + ele->line_idx = ULONG_MAX; + } else if( block_type==FD_VINYL_BSTREAM_CTL_TYPE_ZPAD ) { + block_sz = FD_VINYL_BSTREAM_BLOCK_SZ; + } else { + FD_LOG_CRIT(( "unexpected block type %d", block_type )); + } + + if( FD_UNLIKELY( !block_sz ) ) { + FD_LOG_CRIT(( "Invalid block header at vinyl seq %lu, ctl=%016lx (zero block_sz)", seq, ctl )); + } + if( FD_UNLIKELY( block_sz > 64UL<<20 ) ) { + FD_LOG_CRIT(( "Invalid block header at vinyl seq %lu, ctl=%016lx, block_sz=%lu (unreasonably large block size)", seq, ctl, block_sz )); + } + +next: + seq += block_sz; + } + + /* Persist above erases to disk */ + + int sync_err = fd_vinyl_io_sync( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( sync_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_sync(io_mm) failed (%i-%s)", sync_err, fd_vinyl_strerror( sync_err ) )); + vinyl_mm_sync( ctx ); + + dt += fd_log_wallclock(); + FD_LOG_INFO(( "vinyl txn_commit took %g seconds", (double)dt/1e9 )); +} + +void +fd_snapwm_vinyl_txn_cancel( fd_snapwm_tile_t * ctx ) { + FD_CRIT( ctx->vinyl.txn_active, "txn_cancel called while not in txn" ); + FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); + + fd_vinyl_io_t * io = ctx->vinyl.io_mm; + fd_vinyl_io_rewind( io, ctx->vinyl.txn_seq ); + fd_vinyl_io_sync ( io, FD_VINYL_IO_FLAG_BLOCKING ); +} + +/* Fast writer ********************************************************/ + +void +fd_snapwm_vinyl_wd_init( fd_snapwm_tile_t * ctx ) { + FD_CRIT( ctx->vinyl.io==ctx->vinyl.io_mm, "vinyl not in io_mm mode" ); + + int commit_err = fd_vinyl_io_commit( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit(io_mm) failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); + + /* Flush io_mm */ + + int sync_err = fd_vinyl_io_sync( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( sync_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_sync(io_mm) failed (%i-%s)", sync_err, fd_vinyl_strerror( sync_err ) )); + vinyl_mm_sync( ctx ); + + /* Synchronize sequence numbers */ + + ctx->vinyl.io_wd->seq_ancient = ctx->vinyl.io_mm->seq_ancient; + ctx->vinyl.io_wd->seq_past = ctx->vinyl.io_mm->seq_past; + ctx->vinyl.io_wd->seq_present = ctx->vinyl.io_mm->seq_present; + ctx->vinyl.io_wd->seq_future = ctx->vinyl.io_mm->seq_future; + ctx->vinyl.io_wd->spad_used = 0UL; + + ctx->vinyl.io = ctx->vinyl.io_wd; +} + +void +fd_snapwm_vinyl_wd_fini( fd_snapwm_tile_t * ctx ) { + if( FD_UNLIKELY( ctx->vinyl.io!=ctx->vinyl.io_wd ) ) return; + + int commit_err = fd_vinyl_io_commit( ctx->vinyl.io_wd, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit(io_wd) failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); + + /* Synchronize sequence numbers */ + + ctx->vinyl.io_mm->seq_ancient = ctx->vinyl.io_wd->seq_ancient; + ctx->vinyl.io_mm->seq_past = ctx->vinyl.io_wd->seq_past; + ctx->vinyl.io_mm->seq_present = ctx->vinyl.io_wd->seq_present; + ctx->vinyl.io_mm->seq_future = ctx->vinyl.io_wd->seq_future; + ctx->vinyl.io_mm->spad_used = 0UL; + + ctx->vinyl.io = ctx->vinyl.io_mm; +} + +/* bstream_alloc is a faster version of fd_vinyl_io_alloc. Indirect + calls have significant overhead on Zen 5. */ + +static uchar * +bstream_alloc( fd_vinyl_io_t * io, + ulong sz, + int flags ) { + if( FD_LIKELY( io->impl==&fd_vinyl_io_wd_impl ) ) + return fd_vinyl_io_wd_alloc( io, sz, flags ); + return fd_vinyl_io_alloc( io, sz, flags ); +} + +/* fd_snapwm_vinyl_process_account reads and processes a batch of + pre-generated bstream pairs, handles the meta_map, and determines + whether to forward each of the accounts (pairs) to the database. */ + +void +fd_snapwm_vinyl_process_account( fd_snapwm_tile_t * ctx, + ulong chunk, + ulong acc_cnt ) { + fd_vinyl_io_t * io = ctx->vinyl.io; + fd_vinyl_meta_t * map = ctx->vinyl.map; + + uchar * src = fd_chunk_to_laddr( ctx->in.wksp, chunk ); + + for( ulong acc_i=0UL; acc_ictl ); + + ulong pair_sz = fd_vinyl_bstream_pair_sz( val_esz ); + uchar * pair = bstream_alloc( io, pair_sz, FD_VINYL_IO_FLAG_BLOCKING ); + uchar * dst = pair; + + ulong const account_header_slot = phdr->info.ul[1]; + + fd_vinyl_meta_ele_t * ele = NULL; + if( ctx->full ) { /* update index immediately */ + ulong memo = fd_vinyl_key_memo( map->seed, &phdr->key ); + ele = fd_vinyl_meta_prepare_nolock( map, &phdr->key, memo ); + if( FD_UNLIKELY( !ele ) ) FD_LOG_CRIT(( "Failed to update vinyl index (full)" )); + + if( FD_UNLIKELY( fd_vinyl_meta_ele_in_use( ele ) ) ) { + /* Drop current value if existing is newer */ + ulong const exist_slot = ele->phdr.info.ul[ 1 ]; + if( FD_UNLIKELY( exist_slot > account_header_slot ) ) { + src += pair_sz; + continue; + } + } + + ele->memo = memo; + ele->phdr.ctl = phdr->ctl; + ele->phdr.key = phdr->key; + ele->phdr.info = phdr->info; + ele->seq = ULONG_MAX; /* later init */ + ele->line_idx = ULONG_MAX; + } + + fd_memcpy( dst, src, pair_sz ); + src += pair_sz; + + ulong seq_after = fd_vinyl_io_append( io, pair, pair_sz ); + if( ctx->full ) ele->seq = seq_after; + + ctx->metrics.accounts_inserted++; + } +} + +void +fd_snapwm_vinyl_shutdown( fd_snapwm_tile_t * ctx ) { + int commit_err = fd_vinyl_io_commit( ctx->vinyl.io, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( commit_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_commit(io) failed (%i-%s)", commit_err, fd_vinyl_strerror( commit_err ) )); + int sync_err = fd_vinyl_io_sync( ctx->vinyl.io_mm, FD_VINYL_IO_FLAG_BLOCKING ); + if( FD_UNLIKELY( sync_err ) ) FD_LOG_CRIT(( "fd_vinyl_io_sync(io_mm) failed (%i-%s)", sync_err, fd_vinyl_strerror( sync_err ) )); + vinyl_mm_sync( ctx ); + + fd_vinyl_io_wd_ctrl( ctx->vinyl.io_wd, FD_SNAPSHOT_MSG_CTRL_SHUTDOWN, 0UL ); +} + +void +fd_snapwm_vinyl_read_account( fd_snapwm_tile_t * ctx, + void const * acct_addr, + fd_account_meta_t * meta, + uchar * data, + ulong data_max ) { + if( FD_UNLIKELY( ctx->vinyl.io!=ctx->vinyl.io_mm ) ) { + FD_LOG_CRIT(( "vinyl not in io_mm mode" )); + } + + memset( meta, 0, sizeof(fd_account_meta_t) ); + + /* Query database index */ + + fd_vinyl_key_t key[1]; + fd_vinyl_key_init( key, acct_addr, 32UL ); + ulong memo = fd_vinyl_key_memo( ctx->vinyl.map->seed, key ); + fd_vinyl_meta_ele_t const * ele = fd_vinyl_meta_prepare_nolock( ctx->vinyl.map, key, memo ); + if( FD_UNLIKELY( !ele || !fd_vinyl_meta_ele_in_use( ele ) ) ) { + /* account not found */ + return; + } + + uchar * mmio = fd_vinyl_mmio ( ctx->vinyl.io_mm ); + ulong mmio_sz = fd_vinyl_mmio_sz( ctx->vinyl.io_mm ); + + /* Validate index record */ + + ulong const seq0 = ele->seq; + ulong const ctl = ele->phdr.ctl; + int const ctl_type = fd_vinyl_bstream_ctl_type( ctl ); + ulong const val_esz = fd_vinyl_bstream_ctl_sz ( ctl ); + ulong const pair_sz = fd_vinyl_bstream_pair_sz( val_esz ); + ulong const seq1 = seq0 + pair_sz; + ulong const seq_past = ctx->vinyl.io->seq_past; + ulong const seq_present = ctx->vinyl.io->seq_present; + if( FD_UNLIKELY( ctl_type!=FD_VINYL_BSTREAM_CTL_TYPE_PAIR ) ) { + FD_LOG_CRIT(( "corrupt bstream record in index: ctl=%016lx", ctl )); + } + if( FD_UNLIKELY( val_eszsizeof(fd_account_meta_t)+FD_RUNTIME_ACC_SZ_MAX ) ) { + FD_LOG_CRIT(( "corrupt bstream record in index: val_esz=%lu", val_esz )); + } + int bad_past = !(fd_vinyl_seq_le( seq_past, seq0 ) & fd_vinyl_seq_lt( seq0, seq1 ) & fd_vinyl_seq_le( seq1, seq_present )); + if( FD_UNLIKELY( bad_past ) ) { + FD_LOG_CRIT(( "corrupt bstream record in index: seq[%lu,%lu) not in [seq_past=%lu,seq_present=%lu)", + seq0, seq1, seq_past, seq_present )); + } + + /* Map seq range to underlying device + In the snapshot loader, it is safe to assume that bstream reads + do not wrap around. */ + + if( FD_UNLIKELY( seq1>mmio_sz ) ) { + FD_LOG_CRIT(( "corrupt bstream record in index: seq[%lu,%lu) exceeds bstream addressable range [0,%lu)", + seq0, seq1, mmio_sz )); + } + + /* Read from bstream */ + + ulong seq_meta = seq0 + sizeof(fd_vinyl_bstream_phdr_t); + ulong seq_data = seq_meta + sizeof(fd_account_meta_t); + + memcpy( meta, mmio+seq_meta, sizeof(fd_account_meta_t) ); + if( FD_UNLIKELY( sizeof(fd_account_meta_t)+(ulong)meta->dlen > val_esz ) ) { + FD_LOG_CRIT(( "corrupt bstream record: seq0=%lu val_esz=%lu dlen=%u", seq0, val_esz, meta->dlen )); + } + if( FD_UNLIKELY( meta->dlen > data_max ) ) { + FD_BASE58_ENCODE_32_BYTES( acct_addr, acct_addr_b58 ); + FD_LOG_WARNING(( "failed to read account %s: account data size (%lu bytes) exceeds buffer size (%lu bytes)", + acct_addr_b58, (ulong)meta->dlen, data_max )); + } + memcpy( data, mmio+seq_data, meta->dlen ); +} diff --git a/src/discof/restore/fd_snapin_tile_vinyl.seccomppolicy b/src/discof/restore/fd_snapwm_tile_vinyl.seccomppolicy similarity index 100% rename from src/discof/restore/fd_snapin_tile_vinyl.seccomppolicy rename to src/discof/restore/fd_snapwm_tile_vinyl.seccomppolicy diff --git a/src/discof/restore/fd_snapwr_tile.c b/src/discof/restore/fd_snapwr_tile.c index 6ec9204d15a..20f8f41dee5 100644 --- a/src/discof/restore/fd_snapwr_tile.c +++ b/src/discof/restore/fd_snapwr_tile.c @@ -72,6 +72,10 @@ struct fd_snapwr { ulong * seq_sync; /* fseq->seq[0] */ uint idle_cnt; + ulong req_seen; + ulong tile_cnt; + ulong tile_idx; + struct { ulong last_off; } metrics; @@ -113,7 +117,6 @@ unprivileged_init( fd_topo_t * topo, fd_snapwr_t * snapwr = fd_topo_obj_laddr( topo, tile->tile_obj_id ); memset( &snapwr->metrics, 0, sizeof(snapwr->metrics) ); - if( FD_UNLIKELY( tile->kind_id ) ) FD_LOG_ERR(( "There can only be one `" NAME "` tile" )); if( FD_UNLIKELY( tile->in_cnt !=1UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu ins, expected 1", tile->in_cnt )); if( FD_UNLIKELY( tile->out_cnt!=0UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu outs, expected 0", tile->out_cnt )); @@ -126,6 +129,10 @@ unprivileged_init( fd_topo_t * topo, snapwr->seq_sync = tile->in_link_fseq[ 0 ]; snapwr->state = FD_SNAPSHOT_STATE_IDLE; + + snapwr->req_seen = 0UL; + snapwr->tile_cnt = fd_topo_tile_name_cnt( topo, "snapwr" ); + snapwr->tile_idx = tile->kind_id; } static ulong @@ -203,6 +210,11 @@ handle_control_frag( fd_snapwr_t * ctx, } } +static int +should_process_wr_request( fd_snapwr_t * ctx ) { + return ctx->req_seen%ctx->tile_cnt==ctx->tile_idx; +} + /* handle_data_frag handles a bstream block sz-aligned write request. Does a synchronous blocking O_DIRECT write. */ @@ -219,11 +231,15 @@ handle_data_frag( fd_snapwr_t * ctx, FD_LOG_CRIT(( "vinyl bstream log is out of space" )); } - /* Do a synchronous write(2) */ - ssize_t write_sz = pwrite( ctx->dev_fd, src, src_sz, (off_t)dev_off ); - if( FD_UNLIKELY( write_sz<0 ) ) { - FD_LOG_ERR(( "pwrite(off=%lu,sz=%lu) failed (%i-%s)", dev_off, src_sz, errno, strerror( errno ) )); + if( FD_LIKELY( should_process_wr_request( ctx ) ) ) { + /* Do a synchronous write(2) */ + ssize_t write_sz = pwrite( ctx->dev_fd, src, src_sz, (off_t)dev_off ); + if( FD_UNLIKELY( write_sz<0 ) ) { + FD_LOG_ERR(( "pwrite(off=%lu,sz=%lu) failed (%i-%s)", dev_off, src_sz, errno, strerror( errno ) )); + } } + ctx->req_seen++; + ctx->metrics.last_off = dev_off+src_sz; } diff --git a/src/discof/restore/generated/fd_snapin_tile_vinyl_seccomp.h b/src/discof/restore/generated/fd_snapwm_tile_vinyl_seccomp.h similarity index 92% rename from src/discof/restore/generated/fd_snapin_tile_vinyl_seccomp.h rename to src/discof/restore/generated/fd_snapwm_tile_vinyl_seccomp.h index b46904ef6fd..61fd76094f2 100644 --- a/src/discof/restore/generated/fd_snapin_tile_vinyl_seccomp.h +++ b/src/discof/restore/generated/fd_snapwm_tile_vinyl_seccomp.h @@ -1,6 +1,6 @@ /* THIS FILE WAS GENERATED BY generate_filters.py. DO NOT EDIT BY HAND! */ -#ifndef HEADER_fd_src_discof_restore_generated_fd_snapin_tile_vinyl_seccomp_h -#define HEADER_fd_src_discof_restore_generated_fd_snapin_tile_vinyl_seccomp_h +#ifndef HEADER_fd_src_discof_restore_generated_fd_snapwm_tile_vinyl_seccomp_h +#define HEADER_fd_src_discof_restore_generated_fd_snapwm_tile_vinyl_seccomp_h #if defined(__linux__) @@ -24,9 +24,9 @@ #else # error "Target architecture is unsupported by seccomp." #endif -static const unsigned int sock_filter_policy_fd_snapin_tile_vinyl_instr_cnt = 23; +static const unsigned int sock_filter_policy_fd_snapwm_tile_vinyl_instr_cnt = 23; -static void populate_sock_filter_policy_fd_snapin_tile_vinyl( ulong out_cnt, struct sock_filter * out, unsigned int logfile_fd ) { +static void populate_sock_filter_policy_fd_snapwm_tile_vinyl( ulong out_cnt, struct sock_filter * out, unsigned int logfile_fd ) { FD_TEST( out_cnt >= 23 ); struct sock_filter filter[23] = { /* Check: Jump to RET_KILL_PROCESS if the script's arch != the runtime arch */ @@ -82,4 +82,4 @@ static void populate_sock_filter_policy_fd_snapin_tile_vinyl( ulong out_cnt, str #endif /* defined(__linux__) */ -#endif /* HEADER_fd_src_discof_restore_generated_fd_snapin_tile_vinyl_seccomp_h */ +#endif /* HEADER_fd_src_discof_restore_generated_fd_snapwm_tile_vinyl_seccomp_h */