Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,14 @@ struct StreamQueueItem {
std::optional<at::Tensor> identities;
std::optional<at::Tensor> runtime_meta;
at::Tensor count;
StreamQueueItem(
at::Tensor src_indices,
at::Tensor src_weights,
std::optional<at::Tensor> src_identities,
std::optional<at::Tensor> src_runtime_meta,
at::Tensor src_count) {
indices = std::move(src_indices);
weights = std::move(src_weights);
identities = std::move(src_identities);
runtime_meta = std::move(src_runtime_meta);
count = std::move(src_count);
}
StreamQueueItem(at::Tensor src_indices, at::Tensor src_weights,
std::optional<at::Tensor> src_identities,
std::optional<at::Tensor> src_runtime_meta,
at::Tensor src_count)
: indices(std::move(src_indices)), count(std::move(src_count)),
identities(std::move(src_identities)),
runtime_meta(std::move(src_runtime_meta)),
weights(std::move(src_weights)) {}
};

class RawEmbeddingStreamer : public torch::jit::CustomClassHolder {
Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/include/fbgemm_gpu/utils/tensor_accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ class PackedTensorAccessor
}

protected:
size_t numel_;
size_t numel_{};
char name_[NAME_MAX_LEN];
char context_[CONTEXT_MAX_LEN];

Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/src/config/feature_gates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ std::string to_string(const FeatureGateName& value) {
return "UNKNOWN";
}

bool ev_check_key(const std::string& key) {
static bool ev_check_key(const std::string& key) {
const auto env_var = "FBGEMM_" + key;

const auto value = std::getenv(env_var.c_str());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ using Tensor = at::Tensor;
namespace fbgemm_gpu {

template <typename index_t>
void embedding_inplace_update_cpu_kernel(
static void embedding_inplace_update_cpu_kernel(
at::TensorAccessor<uint8_t, 1> dev_weights,
at::TensorAccessor<uint8_t, 1> uvm_weights,
const at::TensorAccessor<int32_t, 1>& weights_placements,
Expand Down Expand Up @@ -151,9 +151,9 @@ void dram_kv_embedding_inplace_update_cpu(

const uint8_t* weights_tys_ptr = weights_tys.data_ptr<uint8_t>();
const int32_t* D_offsets_ptr = D_offsets.data_ptr<int32_t>();
const uint8_t* update_weights_ptr = update_weights.data_ptr<uint8_t>();
uint8_t* update_weights_ptr = update_weights.mutable_data_ptr<uint8_t>();
const int32_t* update_table_idx_ptr = update_table_idx.data_ptr<int32_t>();
const int64_t* update_row_idx_ptr = update_row_idx.data_ptr<int64_t>();
int64_t* update_row_idx_ptr = update_row_idx.mutable_data_ptr<int64_t>();
const int64_t* update_offsets_ptr = update_offsets.data_ptr<int64_t>();

int64_t window_start = 0;
Expand All @@ -172,15 +172,13 @@ void dram_kv_embedding_inplace_update_cpu(
int32_t D_bytes =
nbit::padded_row_size_in_bytes(D, weight_ty, row_alignment);

uint8_t* batched_weights_ptr = const_cast<uint8_t*>(
update_weights_ptr + update_offsets_ptr[window_start]);
uint8_t* batched_weights_ptr = update_weights_ptr + update_offsets_ptr[window_start];
auto weights_tensor = at::from_blob(
batched_weights_ptr,
{window_size, D_bytes},
at::TensorOptions().dtype(at::kByte));

int64_t* row_ids_ptr =
const_cast<int64_t*>(update_row_idx_ptr + window_start);
int64_t* row_ids_ptr = update_row_idx_ptr + window_start;
auto row_id_tensor = at::from_blob(
row_ids_ptr, {window_size}, at::TensorOptions().dtype(at::kLong));

Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/src/faster_hash_ops/faster_hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ std::tuple<Tensor, Tensor> create_zch_buffer_cpu(
return {identity, metadata};
}

void zero_collision_hash_cpu_out(
static void zero_collision_hash_cpu_out(
Tensor& output,
const Tensor& input,
const Tensor& identities,
Expand Down
14 changes: 7 additions & 7 deletions fbgemm_gpu/src/input_combine_ops/input_combine_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ using Tensor = at::Tensor;

namespace fbgemm_gpu {

void _cat_int_tensors_out(
static void _cat_int_tensors_out(
Tensor& combined_tensors,
const std::vector<Tensor>& tensor_list,
int64_t total_num,
Expand Down Expand Up @@ -82,7 +82,7 @@ void _cat_int_tensors_out(
}
}

Tensor _cat_int_tensors(
static Tensor _cat_int_tensors(
const std::vector<Tensor>& tensor_list,
int64_t total_num,
bool use_pin_memory,
Expand All @@ -107,7 +107,7 @@ Tensor _cat_int_tensors(
return combined_tensors;
}

Tensor _cat_int_tensors_with_padding(
static Tensor _cat_int_tensors_with_padding(
const std::vector<Tensor>& tensor_list,
int64_t total_num,
bool use_pin_memory,
Expand Down Expand Up @@ -140,7 +140,7 @@ Tensor _cat_int_tensors_with_padding(
return combined_tensors;
}

void _cat_per_sample_weights_list_out(
static void _cat_per_sample_weights_list_out(
Tensor& out,
const std::vector<Tensor>& per_sample_weights,
const std::vector<Tensor>& indices_list,
Expand Down Expand Up @@ -178,7 +178,7 @@ void _cat_per_sample_weights_list_out(
}
}

Tensor _cat_per_sample_weights_list(
static Tensor _cat_per_sample_weights_list(
const std::vector<Tensor>& per_sample_weights,
const std::vector<Tensor>& indices_list,
int64_t total_num,
Expand Down Expand Up @@ -375,7 +375,7 @@ void tbe_input_combine_with_length_cpu_out(
combined_per_sample_weights.resize_({0});
}

std::tuple<Tensor, Tensor, Tensor> tbe_input_combine_with_length_cpu(
static std::tuple<Tensor, Tensor, Tensor> tbe_input_combine_with_length_cpu(
const std::vector<Tensor>& indices_list,
const std::vector<Tensor>& lengths_list,
const std::vector<Tensor>& per_sample_weights) {
Expand Down Expand Up @@ -518,7 +518,7 @@ std::tuple<Tensor, Tensor, Tensor> padding_fused_tbe_input_combine_cpu(
/// @param lengths_list list of lengths.
/// @param per_sample_weights list of per_sample_weights
/// @return tuple of combined indices, lengths, and per_sample_weights
std::tuple<Tensor, Tensor, Tensor>
static std::tuple<Tensor, Tensor, Tensor>
padding_fused_tbe_input_combine_with_length_cpu(
const std::vector<Tensor>& indices_list,
const std::vector<Tensor>& lengths_list,
Expand Down
6 changes: 3 additions & 3 deletions fbgemm_gpu/src/input_combine_ops/input_combine_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ enum args_pos {
};

template <typename T>
uint64_t compute_num_uint64s(const uint64_t num_elements) {
static uint64_t compute_num_uint64s(const uint64_t num_elements) {
const uint64_t ratio = sizeof(uint64_t) / sizeof(T);
return (num_elements + ratio - 1) / ratio;
}

void offset_tbe_input_combine_with_length_args(
static void offset_tbe_input_combine_with_length_args(
uint64_t** indices_addrs,
uint64_t** lengths_addrs,
uint64_t** indices_offsets,
Expand All @@ -59,7 +59,7 @@ void offset_tbe_input_combine_with_length_args(
reinterpret_cast<uint32_t*>(base_addr + ptr_offsets[P_lengths_is_long]);
}

std::tuple<Tensor, Tensor, Tensor> tbe_input_combine_with_length_gpu(
static std::tuple<Tensor, Tensor, Tensor> tbe_input_combine_with_length_gpu(
const std::vector<Tensor>& indices_list,
const std::vector<Tensor>& lengths_list,
const std::vector<Tensor>& per_sample_weights) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ class JaggedSliceOp : public torch::autograd::Function<JaggedSliceOp> {
} // namespace

///@ingroup jagged-tensor-ops-cpu
Tensor jagged_to_padded_dense_forward_autograd(
static Tensor jagged_to_padded_dense_forward_autograd(
const Tensor& values,
const std::vector<Tensor>& offsets,
const c10::SymIntArrayRef max_lengths,
Expand Down Expand Up @@ -883,7 +883,7 @@ std::tuple<Tensor, std::vector<Tensor>> dense_to_jagged(
auto output = op.call(dense, offsets, total_L);
return {output, offsets};
}
Tensor dense_to_jagged_forward_autograd(
static Tensor dense_to_jagged_forward_autograd(
const Tensor& dense,
const std::vector<Tensor>& offsets,
std::optional<at::SymInt> total_L) {
Expand Down
Loading