diff --git a/.cursor/rules/bugsnag/fix-bugsnag-issues.mdc b/.cursor/rules/bugsnag/fix-bugsnag-issues.mdc new file mode 100644 index 00000000..630c982d --- /dev/null +++ b/.cursor/rules/bugsnag/fix-bugsnag-issues.mdc @@ -0,0 +1,72 @@ +--- +description: Describes how to handle a request to fix a bugsnag issue +globs: +alwaysApply: false +--- +# Bugsnag Ticket Workflow + +When a user provides a Bugsnag error URL or asks to fix a Bugsnag issue, follow this systematic approach: + +## 1. **Root Cause Analysis** + +### Error Investigation +- Use `mcp_bugsnag-mcp_list-error-events` to get recent events for the error +- Use `mcp_bugsnag-mcp_get-stacktrace` with `include_code: true` and `show_all_frames: true` to get the complete stacktrace +- Analyze the stacktrace to identify: + - The exact line and file where the error occurs + - The call stack that leads to the error + - The error message and context + +### Codebase Investigation +- Read the relevant files identified in the stacktrace +- Search for related code patterns or similar implementations +- Identify the data flow that leads to the problematic state +- Look for edge cases or missing null/undefined checks + +## 2. **Suggest Fixes (No Code Changes)** + +### Analysis Summary +- Provide a clear explanation of what's causing the error +- Identify the specific conditions that trigger the issue +- Explain the impact and severity of the error +- If you can't figure out what is going on, just say so and ask the user for more context + +### Proposed Solutions +- Present 1-3 potential fix approaches with pros/cons +- Suggest relevant tests to prevent regression +- Try to be tactical with your suggestions, avoid large refactors when possible + +### Implementation Recommendations +- Specify which files need to be modified +- Outline the exact changes needed (but don't make them yet) +- Mention any dependencies or related changes required +- Highlight potential breaking changes or compatibility concerns + +## 3. **User Confirmation & Implementation** + +### Get Approval +- Wait for user confirmation on the proposed approach +- Allow for discussion and refinement of the solution +- Clarify any ambiguous requirements or edge cases + +### Implementation & PR Creation +Once the user confirms the approach: +- Follow the below process: + - Making code changes + - Adding tests if needed + +--- + +**Example Workflow:** + +- User: "Fix this Bugsnag issue: https://app.bugsnag.com/" +- Assistant: + 1. Fetches error events and stacktrace from Bugsnag + 2. Analyzes the code to identify root cause + 3. Proposes specific fix approach with explanation + 4. Waits for user confirmation + 5. Making code changes and tests if needed + +--- + +**Note:** This workflow ensures thorough analysis before making changes, reducing the risk of incomplete fixes or introducing new issues while maintaining the systematic approach for PR creation. \ No newline at end of file diff --git a/.cursor/rules/coding-workflow.mdc b/.cursor/rules/coding-workflow.mdc new file mode 100644 index 00000000..a5b8a37a --- /dev/null +++ b/.cursor/rules/coding-workflow.mdc @@ -0,0 +1,19 @@ +--- +description: Coding workflow cursor must follow +globs: +alwaysApply: false +--- +# Coding workflow preferences +- Focus on the areas of code relevant to the task +- Do not touch code that is unrelated to the task +- Avoid making major changes to the patterns and architecture of how a feature works, after it has shown to work well, unless explicitly instructed +- Always think about what other methods and areas of code might be affected by code changes +- Keep code simple and readable +- Write thorough tests for all functionalities you wrote + +Follow this sequential workflow: +1. Write or update existing code +2. Write the incremental unit-test to cover code logic you wrote +3. Test unit-test pass +4. Verify it passes all the tests by running `make test` command +5. Ensue your unit-test has good code coverage for the code you have written diff --git a/.cursor/rules/github/pr-template-format.mdc b/.cursor/rules/github/pr-template-format.mdc new file mode 100644 index 00000000..bcdac6de --- /dev/null +++ b/.cursor/rules/github/pr-template-format.mdc @@ -0,0 +1,59 @@ +--- +description: +globs: +alwaysApply: false +--- +--- +description: Follow PR template format from .github/pull_request_template.md when creating pull request descriptions +globs: "**/*" +alwaysApply: false +--- + +# GitHub Pull Request Template Format + +When creating pull request descriptions, you **must** follow the format specified in `.github/pull_request_template.md` if it exists in the repository. + +## Rule Requirements + +1. **Check for Template**: Always check if `.github/pull_request_template.md` exists in the repository root before creating PR descriptions. + +2. **Use Template Structure**: If the template exists: + - Follow the exact section structure defined in the template + - Include all required sections from the template + - Maintain the same heading levels and formatting style + - Preserve any placeholder text guidelines or instructions + - Fill in the template sections with relevant information for the specific PR + +3. **Template Sections**: Common sections that should be preserved if present in the template include: + - **Summary/Description**: Brief overview of the changes + - **Changes Made**: Detailed list of modifications + - **Testing**: How the changes were tested + - **Type of Change**: Bug fix, feature, documentation, etc. + - **Checklist**: Action items or verification steps + - **Breaking Changes**: Any backward compatibility concerns + - **Related Issues**: Links to related issues or tickets + +4. **Fallback Behavior**: If no template exists, create a well-structured PR description with: + - Clear summary of changes + - Bullet points for key modifications + - Testing information if applicable + - Any relevant context or notes + +## Implementation Guidelines + +- **Read Template First**: Use tools to read the `.github/pull_request_template.md` file content before generating PR descriptions +- **Preserve Formatting**: Maintain markdown formatting, comments, and structure from the template +- **Fill Appropriately**: Replace template placeholders with actual, relevant information +- **Be Comprehensive**: Ensure all template sections are addressed, even if briefly +- **Stay Consistent**: Use the same tone and style as indicated by the template + +## Example Usage + +When creating a PR: +1. Check for `.github/pull_request_template.md` +2. If found, read the template content +3. Generate PR description following the template structure +4. Fill in each section with relevant information +5. Ensure all required sections are included + +This rule ensures consistency across all pull requests in repositories that have established PR templates, while providing a sensible fallback for repositories without templates. diff --git a/.cursor/rules/golang/coding-rules.mdc b/.cursor/rules/golang/coding-rules.mdc new file mode 100644 index 00000000..125cceb3 --- /dev/null +++ b/.cursor/rules/golang/coding-rules.mdc @@ -0,0 +1,27 @@ +--- +description: Rules to follow when writing code +globs: *.go +--- +You are Staff Software Engineer expert in Golang, Protobuff, GRPC. You write clean and properly docummented code. You ensure code written works, and you always write corresponding Unit-tests. +You are an expert in writing Unit-test, and specialised in updating existing unit-test to fit missing use-cases. + +# Coding pattern preferences +- Always prefer simple solutions. +- Keep the codebase very clean and organized. +- Avoid duplication of code whenever possible, which means checking for other areas of the codebase that might already have similar code and functionality. +- Write code that takes into account the different environments: development, staging, and production. +- You are careful to only make changes that are requested or you are confident are well understood and related to the change being requested. +- When fixing an issue or bug, do not introduce a new pattern or technology without first exhausting all options for the existing implementation. And if you finally do this, make sure to remove the old ipmlementation afterwards so we don't have duplicate logic. +- Avoid having files over 200-300 lines of code. Refactor at that point. +- Mocking data is only needed for tests, never mock data for dev or prod. +- Avoid writing scripts in files if possible, especially if the script is likely only to be run once. +- Never add stubbing or fake data patterns to code that affects the dev or prod environments. +- Never overwrite config *.yml (Yaml) files without first asking and confirming. +- It is acceptable to say you do not know. +- Do not write your own mocks in testing. Follow this sequence: +1. Update Makefile to generate the mock needed. + Following the example for internal packages + `mockgen -source=internal/client/users_service.go -destination=internal/dao/mocks/mock_users_service.go -package=mockdao` + and for external packages follow this: + ` mockgen -destination=mocks/eth_client_mock.go -mock_names EthClient=MockEthClient -package=mocks github.cbhq.net/intl/rip7755-fulfiller/internal/client EthClientmockgen` +2. Update testing code to use the generated mock diff --git a/.cursor/rules/golang/english-and-comments-std.mdc b/.cursor/rules/golang/english-and-comments-std.mdc new file mode 100644 index 00000000..c35cf56f --- /dev/null +++ b/.cursor/rules/golang/english-and-comments-std.mdc @@ -0,0 +1,49 @@ +--- +description: Make sure to always add clear comments within the codebase +globs: *.go +alwaysApply: true +--- +# Standard: Language, Comments, and Documentation + +This rule defines the standards for language use and commenting within the codebase, prioritizing clarity and developer experience for engineers familiar with the project. + +**1. Language:** + +* **All** code artifacts, including variable names, function names, comments, documentation, commit messages, and generated rules, **must** be written in **simple, clear, friendly and idiomatic English**. + +**2. Comments:** + +* **Target Audience:** Assume the reader is an experienced developer familiar with Go and the general project context. +* **Prioritize Readability:** Code should be self-documenting whenever possible through clear naming and structure. +* **Avoid Redundant Comments:** Do **not** add comments that merely restate what the code clearly does. For example: + ```go + // Bad: Comment explains the obvious + // Increment count + count++ + + // Good: Comment starts with the function name + // GetUserByID finds a user by their ID + func GetUserByID(id string) (*User, error) { ... } + ``` +* **Focus on the "Why", Not Just the "What":** Prioritize comments that explain *why* a particular approach was taken, especially if it's non-obvious, involves trade-offs, or relates to external factors or historical context. + * Explain complex logic or algorithms briefly. + * Clarify the purpose of seemingly arbitrary values or constants. + * Document known limitations, potential issues, or future work (`TODO`, `FIXME`). + * Add comments when fixing subtle bugs to explain the reasoning. + ```go + // Good: Explains the rationale for a non-obvious choice + // Use FNV-1a hash for Redis hash tags to optimize for speed and key length, + // as cryptographic security is not required for slot assignment. + func hashUserIDForTag(userID string) string { ... } + + // Good: Explains a workaround or limitation + // TODO(GH-123): Refactor this when the upstream API supports batch requests. + for _, item := range items { ... } + ``` +* **Placement:** Place comments on the line *before* the code they refer to, or sometimes at the end of a line for very short clarifications. + +**3. Documentation (e.g., READMEs, Design Docs):** + +* Maintain clarity and conciseness. +* Keep documentation up-to-date with significant code changes. +* Use diagrams or examples where appropriate to illustrate complex concepts. diff --git a/.cursor/rules/golang/golang-naming-std.mdc b/.cursor/rules/golang/golang-naming-std.mdc new file mode 100644 index 00000000..25a1e963 --- /dev/null +++ b/.cursor/rules/golang/golang-naming-std.mdc @@ -0,0 +1,69 @@ +--- +description: Avoid package prefix redundancy when naming +globs: *.go +alwaysApply: false +--- +# Go Standard: Naming Conventions - Avoid Package Prefix Redundancy + +This rule outlines the standard Go practice for naming exported identifiers to avoid redundancy with the package name. + +**The Standard:** + +When naming exported identifiers (types, functions, variables, constants), **avoid repeating the package name** if the context provided by the package itself makes the identifier clear. + +**Rationale:** + +* **Readability:** Code that imports the package becomes cleaner and less verbose. For example, `store.New()` is more idiomatic and readable than `store.NewStore()`. +* **Conciseness:** Reduces unnecessary stuttering in code (e.g., `store.StoreType` vs. `store.Type`). +* **Idiomatic Go:** Follows the common practice seen in the Go standard library and many popular Go projects. + +**Examples:** + +```go +// --- Package: store --- + +// BAD: Repeats "store" +package store + +type StoreConfig struct { ... } +func NewStore(cfg StoreConfig) (*Store, error) { ... } +var DefaultStoreOptions StoreOptions + +// GOOD: Avoids repeating "store" +package store + +type Config struct { ... } // Type name is clear within package 'store' +func New(cfg Config) (*Store, error) { ... } // Function name 'New' is clear +var DefaultOptions Options // Variable name is clear + +type Store struct { ... } // OK: Identifier itself IS the package name conceptually +``` + +When importing and using the "good" example: + +```go +import "path/to/store" + +// ... +cfg := store.Config{ ... } +activityStore, err := store.New(cfg) +opts := store.DefaultOptions +var s store.Store +``` + +This reads much better than: + +```go +import "path/to/store" + +// ... +cfg := store.StoreConfig{ ... } +activityStore, err := store.NewStore(cfg) +opts := store.DefaultStoreOptions +var s store.Store +``` + +**Exceptions:** + +* It is acceptable if the identifier itself essentially *is* the package name (e.g., `package http; type Client`, `package store; type Store`). +* Sometimes repeating a part of the package name is necessary for clarity if the package has many distinct concepts. diff --git a/.cursor/rules/golang/golang-use-getters.mdc b/.cursor/rules/golang/golang-use-getters.mdc new file mode 100644 index 00000000..149f3802 --- /dev/null +++ b/.cursor/rules/golang/golang-use-getters.mdc @@ -0,0 +1,53 @@ +--- +description: Prefer getter methods over direct field access +globs: *.go +alwaysApply: false +--- +# Go Standard: Prefer Getter Methods (Especially for Protobuf) + +This rule encourages the use of getter methods over direct field access, particularly when working with structs generated from Protobuf definitions. + +**The Standard:** + +**Prefer using generated getter methods (e.g., `myProto.GetMyField()`) over direct field access (e.g., `myProto.MyField`) when such getters are available.** This is especially relevant for structs generated by the Protobuf compiler (`protoc-gen-go`). + +**Rationale:** + +* **Encapsulation and Nil Safety:** Getters often provide a layer of abstraction. For Protobuf messages, getters automatically handle `nil` checks for pointer fields (like optional message fields or fields within a `oneof`), returning a zero value instead of causing a panic. This significantly improves robustness. +* **Consistency:** Using getters consistently makes the code easier to read and maintain, aligning with common Go practices for Protobuf. +* **Future-Proofing:** Relying on the getter method decouples the calling code from the exact internal representation of the field. If the underlying field changes in a backward-compatible way (e.g., how a default value is handled), code using the getter is less likely to break. +* **Helper Logic:** Getters might potentially include minor logic (though less common in basic Protobuf getters beyond nil checks). + +**Example (Protobuf):** + +```protobuf +// -- Example.proto -- +message UserProfile { + optional string name = 1; + optional int32 age = 2; +} +``` + +```go +// -- Go code -- +import "path/to/gen/go/examplepb" + +func processProfile(profile *examplepb.UserProfile) { + // GOOD: Uses getter, safe even if profile or profile.Name is nil. + name := profile.GetName() + age := profile.GetAge() // Also handles potential nil receiver safely. + + // BAD: Direct access risks nil pointer dereference if profile is non-nil + // but profile.Name is nil (for optional fields). + // nameDirect := *profile.Name // PANICS if profile.Name is nil! + // ageDirect := *profile.Age // PANICS if profile.Age is nil! + + fmt.Printf("Name: %s, Age: %d\n", name, age) +} +``` + +**Exceptions:** + +* **No Getter Available:** If a struct field does not have a corresponding getter method, direct access is necessary. +* **Performance Critical Code:** In extremely rare, performance-critical sections where profiling has demonstrably shown the function call overhead of the getter to be a bottleneck, direct access *might* be considered cautiously. This should be well-documented and justified. +* **Setting Values:** This rule applies to *reading* values. Setting struct fields typically involves direct assignment. diff --git a/.cursor/rules/golang/grpcclient.mdc b/.cursor/rules/golang/grpcclient.mdc new file mode 100644 index 00000000..5dac8a63 --- /dev/null +++ b/.cursor/rules/golang/grpcclient.mdc @@ -0,0 +1,17 @@ +--- +description: Creating a new grpc connection should use csf's grpcclient, not golang/grpc +globs: *.go +alwaysApply: false +--- +Pull in the package from github.cbhq.net/engineering/csf/grpcclient + +Here is an example of how you should implement it +``` + manager := csf.New() + ctx := manager.ServiceContext() + conn, err := grpcclient.Dial( + ctx, + endpoint, + grpcclient.WithDialOpt(grpc.WithBlock()), + ) +``` \ No newline at end of file diff --git a/.cursor/rules/observability/logging-best-practices.mdc b/.cursor/rules/observability/logging-best-practices.mdc new file mode 100644 index 00000000..52e94b5a --- /dev/null +++ b/.cursor/rules/observability/logging-best-practices.mdc @@ -0,0 +1,255 @@ +--- +description: Rules to follow when logging +globs: ["*.go", "*.py"] +--- +## Rule +Ensure all logging statements are correct, useful, performant, and secure. Logs must accurately reflect the code's logic, provide sufficient context for debugging, avoid performance degradation, and never expose sensitive information. Use appropriate log levels and avoid logging sensitive data, large objects, or high-frequency debug information. + +Refer to [logging best practices](https://docs.cbhq.net/infra/observability/logging) for more details. + +## Scope +This rule ONLY APPLIES when ALL the following conditions are met: +1. Code contains logging statements (logger.debug, log.info, console.log, etc.) +2. The logging exhibits one or more of these problematic patterns (tagged with Defect Patterns and scenarios): + - **(SS-1) Logging Sensitive Data:** Exposing credentials, tokens, PII, or financial data (passwords, API keys, email addresses). + - **(SS-2) Logging Unsanitized Objects:** Logging entire request/response bodies or complex objects without removing sensitive fields. + - **(PF-1) Logging in High-Frequency Loops:** Placing log statements inside tight loops or on other hot paths like function entry/exit logging for every function, causing excessive volume. + - **(LV-1) Improper Log Level:** Using `DEBUG` or `TRACE` in non-development environments. Using `WARN` or `ERROR` for informational messages. + - **(SM-1) Unit/Metric Mismatch:** The unit in the log message (e.g., "ms") does not match the variable's actual unit (e.g., nanoseconds). + - **(SM-2) Message Contradicts Logic:** The log message misrepresents the code's state (e.g., logging "Success" in an `if err!= nil` block). + - **(IS-1) Insufficient Context:** An error log is not actionable because it omits the `error` variable or critical identifiers (e.g., `transaction_id`). + - **(VR-2) Placeholder-Argument Mismatch:** The number of placeholders in a format string (e.g., `%s`) does not match the number of provided variables. + - **(RD-3) Vague or Ambiguous Message:** The log message is too generic to be useful (e.g., "Done", "Error"). + +| Defect Patterns | Scenario Name | +| --- | --- | +| **RD:** Readability Issues | RD-1: Complicated domain-specific terminology
RD-2: Non-standard language used
RD-3: Poorly formatted or unclear messages | +| **VR:** Variable Issues | VR-1: Incorrect variable value logging
VR-2: Placeholder–value mismatch | +| **LV:** Logging Level Issues | LV-1: Improper verbosity level | +| **SM:** Semantics Inconsistent | SM-1: Wrong unit or metric label
SM-2: Message text does not match the code
SM-3: Misused variables in the message | +| **SS:** Sensitive Information | SS-1: Credentials logged in plain text
SS-2: Dumping whole objects without scrubbing | +| **IS:** Insufficient Information | IS-1: Insufficient information | +| **PF:** Performance Issues | PF-1: Logging on hot path
PF-2: Costly string operations | + +## Out of Scope +This rule does NOT apply to: +- ERROR and WARN level logs for genuine issues +- INFO level logs for significant business events +- Structured logging that includes only relevant, non-sensitive fields. +- Logs that are properly sampled or rate-limited +- Test files or development-only code +- Logs that are conditionally enabled for debugging + +## Good Examples + +1. Appropriate log levels with structured data: +```go +logger.Info("User login successful", + "user_id", userID, + "login_method", "oauth", + "ip_address", clientIP, +) +``` + +2. ERROR logs with context but no sensitive data: +```go +logger.Error("Payment processing failed", + "error_code", "INSUFFICIENT_FUNDS", + "transaction_id", txnID, + "user_id", userID, + "retry_count", retryCount, +) +``` +```go +user, err := db.FindUser(userID) +if err!= nil { + logger.Error("Failed to find user", + "error", err, + "user_id", userID, + "trace_id", traceID, + ) + return +} +``` + + +3. Conditional debug logging: +```go +if config.DebugEnabled { + logger.Debug("Processing order for user", "user_id", userID) +} +``` + +4. Sampling high-frequency events: +```go +// Only log 1% of successful requests +if rand.Float32() < 0.01 { + logger.Info("Request processed successfully", + "endpoint", endpoint, + "duration_ms", duration.Milliseconds(), + ) +} +``` + +5. Structured logging with relevant fields: +```go +logger.Info("Order processed", + "order_id", order.ID, + "user_tier", user.Tier, + "payment_method", "card", + "processing_time_ms", processingTime, +) +``` + +6. Summarizing after a loop instead of logging within it: +```go +processedCount := 0 +for _, item := range items { + if err := process(item); err == nil { + processedCount++ + } +} +logger.Info("Batch processing complete", + "total_items", len(items), + "processed_count", processedCount, +) +``` + +7. Logging specific, safe fields instead of a whole object: +```go +logger.Info("User profile updated", + "user_id", user.ID, + "user_tier", user.Tier, + "updated_fields", updatedFields, +) +``` + +8. Ensuring unit consistency in logs +```go +//... operation... +duration := time.Since(startTime) +logger.Info("Request processed", + "duration_ms", duration.Milliseconds(), +) +``` + +## Bad Examples + +1. (LV-1) DEBUG logging in production code: +```go +// DEBUG logs create excessive volume +logger.Debug("Entering function processPayment") +logger.Debug("Validating payment request") +logger.Debug("Connecting to payment gateway") +logger.Debug("Exiting function processPayment") +``` + +2. (PF-1) Logging inside loops: +```go +// Creates massive log volume +for _, item := range items { + logger.Info("Processing item", "item_id", item.ID) + // ... process item +} +``` + +3. (SS-2) Logging entire objects or request bodies: +```go +// Logs potentially sensitive data and large objects +logger.Info("Received request", "request_body", fmt.Sprintf("%+v", requestBody)) +logger.Info("User object", "user", fmt.Sprintf("%+v", user)) +``` +```go +// BAD: Logs the entire user object, potentially exposing PII. +logger.Info("User object details", "user", fmt.Sprintf("%+v", user)) +``` + +4. (SS-1) Logging sensitive information: +```go +// Exposes sensitive data in logs +logger.Info("Authentication attempt", + "email", user.Email, + "password", password, + "api_key", apiKey, + "token", authToken, + "auth_token", authToken, + "bearer_token", bearerToken, + "credit_card", creditCard, +) +``` + +5. (PF-1) Function entry/exit logging everywhere: +```go +// Excessive noise for every function +func calculateTotal(items []Item) float64 { + logger.Debug("Entering calculateTotal") + total := 0.0 + for _, item := range items { + total += item.Price + } + logger.Debug("Exiting calculateTotal", "total", total) + return total +} +``` + +6. (SS-1) Logging URLs with sensitive information: +```go +// Exposes sensitive token in URL parameter +logger.Info("API request", "url", fmt.Sprintf("/api/users/%s/payments/%s?token=%s", userID, paymentID, token)) +``` + +7. (SM-2) Message contradicts code logic: +```go +// BAD: A success message is logged in the error-handling block. +err := processPayment(paymentDetails) +if err!= nil { + logger.Info("Payment processed successfully", "transaction_id", paymentDetails.ID) + //... handle error... +} +``` + +8. (IS-1) Insufficient context in an error log: +```go +// BAD: The log is not actionable because it omits the actual 'err' variable. +err := db.Save(user) +if err!= nil { + logger.Error("Failed to save user to database", "user_id", user.ID) +} +``` + +9. (SM-1) Unit mismatch in the log message: +```go +// BAD: The log text claims "ms" but the variable is in nanoseconds. +startTime := time.Now() +//... operation... +durationNanos := time.Since(startTime).Nanoseconds() +logger.Info(fmt.Sprintf("Task completed in %d ms", durationNanos)) +``` + +10. (VR-2) Placeholder-argument mismatch: +```go +// BAD: Two placeholders but only one argument provided. +logger.Error(fmt.Sprintf("Login for user %s from %s failed", userID)) +``` + + +## Evaluation Process +1. Identify all logging statements in the code +2. Check log levels - flag DEBUG logs that aren't conditionally enabled +3. Analyze control flow - for each logging statement, analyze its surrounding code to understand the logical context (e.g., is it inside an error-handling block like if err!= nil, a success path, or a loop?). Look for logging patterns inside loops or high-frequency operations +4. Extract semantic intent - use natural language understanding to determine the meaning of the static log message text (e.g., does it imply success, failure, or a status update?). +5. Correlate Logic and Intent - Compare the code's logical context with the message's semantic intent. Flag contradictions (e.g., a "success" message in a failure block - SM-2). +6. Analyze Log Content and Variables: + - Flag the logging of entire un-sanitized objects, request bodies, or other large data structures (SS-2). + - Scan for sensitive data patterns (passwords, keys, PII) in variable names, message text, and URL parameters (SS-1). + - URLs with parameters that might contain sensitive data + - For metric variables (e.g., duration), verify consistency between the variable's actual unit and the unit stated in the log message (SM-1). + - In error-handling blocks, verify that the `error` variable itself is being logged to prevent IS-1. + - Check for function entry/exit logging patterns +7. For each problematic pattern, suggest alternatives: + - Use appropriate log levels (ERROR, WARN, INFO) + - Sample high-frequency logs + - Log only relevant fields instead of entire objects + - Use structured logging with sanitized data + - Move detailed debugging to distributed tracing + - Rate-limit repetitive logs diff --git a/.cursor/rules/observability/metrics-best-practices.mdc b/.cursor/rules/observability/metrics-best-practices.mdc new file mode 100644 index 00000000..d2230570 --- /dev/null +++ b/.cursor/rules/observability/metrics-best-practices.mdc @@ -0,0 +1,136 @@ +--- +description: Rules to follow when emitting metrics +globs: ["*.go", "*.py"] +--- +## Rule +Do not use high-cardinality values as tags when emitting metrics. High-cardinality tags can significantly increase observability costs and impact system performance. Every tag that contains an ID must be flagged. + +Refer to [tagging strategy](https://docs.cbhq.net/infra/observability/metrics#tagging-strategy) for more details. + +## Scope +This rule applies to ANY metric emission found ANYWHERE in the code changes, regardless of the primary purpose of the PR. ALL metric emission calls (statsd) must be checked for violations. + +Violation happens when ALL the following conditions are met: +1. Code is emitting metrics to a monitoring system (Datadog, StatsD, etc.) +2. Tags or labels are being added to the metric +3. The tag values contain AT LEAST ONE high-cardinality. + +### High-cardinality Guidelines: +A tag value is considered high-cardinality if it falls into any of these categories: +- **Looks unique** – anything that sounds like it will generate a one-off value (e.g., id, uuid, token, session, generated_x) +- **Continuous values** – non-discrete numbers that can vary infinitely, like current_price, latitude, longitude, sensor readings, etc. +- **High entropy values** – anything random or cryptographic such as random, hash, sha1, md5, encrypted, signature + +### Common High-cardinality Examples: +- **Identifiers**: User IDs, customer IDs, account identifiers (`user_id`, `customer_id`, `account_id`) +- **Request tracking**: Request IDs, trace IDs, transaction IDs (`message_id`, `request_id`, `trace_id`) +- **Pattern-based keys**: Any tag key ending with `_id`, `_uuid`, `_token` +- **Time-based values**: Timestamps or time-based values +- **Network data**: URLs with parameters, dynamic paths, IP addresses, specific hostnames +- **Unique identifiers**: UUIDs, hashes, or other unique identifiers +- **Personal data**: Email addresses or user-specific data + +## Good Examples + +1. Using low-cardinality status codes: +```go +statsdClient.CountWithTags("api.requests_total", 1, map[string]string{ + "method": "GET", + "status": "200", + "endpoint": "/api/users", +}) +``` + +2. Building tags separately with low-cardinality values: +```go +tags := map[string]string{ + "method": "GET", + "status": "200", + "endpoint": "/api/users", +} +statsdClient.CountWithTags("api.requests_total", 1, tags) +``` + +3. Using bounded categorical values: +```go +statsdClient.CountWithTags("payment.processed", 1, map[string]string{ + "payment_method": "card", // Limited values: card, bank, crypto + "region": "us-east-1", // Limited AWS regions + "user_tier": "premium", // Limited tiers: basic, premium, enterprise +}) +``` + +4. Aggregating instead of individual IDs: +```go +// Instead of user_id tag, use aggregated user tier +statsdClient.GaugeWithTags("user.active_sessions", sessionCount, map[string]string{ + "user_tier": getUserTier(userID), // Low cardinality + "region": "us-west-2", +}) +``` + +## Bad Examples + +1. Using user IDs or message IDs as tags: +```go +// VIOLATION: user_id and message_id are high-cardinality IDs +statsd.IncrWithTags(statsdUSTEventProcessingFailure, map[string]string{ + "message_id": messageID, // VIOLATION: message_id is high-cardinality + "error_type": "nil_body", +}) +``` + +2. Building tags separately with ID values: +```go +// VIOLATION: Still problematic when tags are built separately +tags := map[string]string{ + "user_id": userID, // VIOLATION: user_id is high-cardinality + "message_id": messageID, // VIOLATION: message_id is high-cardinality +} +statsd.HistogramWithTags(statsdUSTRepositoryOperationTime, value, tags) +``` + +3. Using request IDs or trace IDs: +```go +// VIOLATION: Request IDs are unique for every request +statsdClient.TimingWithTags("api.response_time", duration, map[string]string{ + "request_id": "req_abc123def456", // VIOLATION: request_id is high-cardinality + "trace_id": "7d5d747be160e280504c099d984bcfe0", // VIOLATION: trace_id is high-cardinality +}) +``` + +4. Using timestamps as tags: +```go +// VIOLATION: Timestamps create unlimited unique values +stats.CountWithTags("queue.length", 1, map[string]string{ + "timestamp": time.Now().Format("2006-01-02T15:04:05"), // VIOLATION: timestamp is high-cardinality + "queue_name": "payments", +}) +``` + +5. Using IP addresses: +```go +// VIOLATION: IP addresses have very high cardinality +statsd.GaugeWithTags("connections.active", 1, map[string]string{ + "client_ip": "192.168.1.100", // VIOLATION: IP address is high-cardinality + "hostname": "server-abc123-def456", // VIOLATION: Dynamic hostnames are high-cardinality +}) +``` + +## Evaluation Process +1. Search the entire code change for ANY calls to `statsd` function that ends with `WithTags` (`statsd.*WithTags`) or tags map building, regardless of the PR's stated purpose +2. For each metric emission found, examine ALL tag keys in the map +3. If ANY tag key contains high-cardinality patterns, flag as violation. Examples for violations: + - `user_id`, `message_id`, `request_id`, `trace_id`, `session_id`, `customer_id` + - Any key ending with `_id`, `_uuid`, `_token` + - Timestamps, IP addresses, URLs with parameters +4. Do not skip metric calls because they seem unrelated to the main PR purpose +5. Check ANY function that contains these strings in its name: `CountWithTags`, `IncrWithTags`, `GaugeWithTags`, `HistogramWithTags`, `TimingWithTags`, `DistributionWithTags`. This includes methods from any package or client (e.g., `statsd.CountWithTags()`, `client.IncrWithTags()`, `metrics.GaugeWithTags()`, etc.) + +**Example**: Even if a PR is titled "Add configuration options", still check ALL metric emissions like: +```go +statsd.IncrWithTags(metric, map[string]string{ + "message_id": messageID, // VIOLATION - Must be flagged + "error_type": "nil_body", +}) +``` diff --git a/.cursor/rules/python/api-design.mdc b/.cursor/rules/python/api-design.mdc new file mode 100644 index 00000000..58f975f1 --- /dev/null +++ b/.cursor/rules/python/api-design.mdc @@ -0,0 +1,28 @@ +# .cursor/rules/python/api-design.mdc +--- +description: API design principles for Python microservices +globs: ["*.py"] +alwaysApply: true +--- +# API Design Principles + +## REST API Design +- Use proper HTTP methods (GET, POST, PUT, DELETE) +- Implement proper status codes +- Use plural nouns for resource endpoints +- Version APIs in the URL (e.g., /v1/resources) +- Use query parameters for filtering and pagination + +## Request/Response +- Validate all input data +- Use Pydantic models for request/response schemas +- Include proper error responses +- Implement pagination for list endpoints +- Use consistent response formats + +## Security +- Implement proper authentication +- Use JWT for stateless authentication +- Implement rate limiting +- Validate and sanitize all inputs +- Use HTTPS only \ No newline at end of file diff --git a/.cursor/rules/python/dependency-management.mdc b/.cursor/rules/python/dependency-management.mdc new file mode 100644 index 00000000..a02d227f --- /dev/null +++ b/.cursor/rules/python/dependency-management.mdc @@ -0,0 +1,21 @@ +# .cursor/rules/python/dependency-management.mdc +--- +description: Package and dependency management guidelines +globs: ["pyproject.toml", "requirements.txt", "setup.py"] +alwaysApply: true +--- +# Dependency Management + +## Package Management +- Use Poetry for dependency management +- Pin all dependencies with exact versions +- Use separate dependency groups for dev and prod +- Regular security audits with safety +- Keep dependencies up to date + +## Virtual Environments +- Use virtual environments for all projects +- Document Python version requirements +- Use .env files for environment variables +- Keep production dependencies minimal +- Document all third-party integrations \ No newline at end of file diff --git a/.cursor/rules/python/distributed-computing.mdc b/.cursor/rules/python/distributed-computing.mdc new file mode 100644 index 00000000..0b727edd --- /dev/null +++ b/.cursor/rules/python/distributed-computing.mdc @@ -0,0 +1,25 @@ +# .cursor/rules/python/distributed-computing.mdc +--- +description: Guidelines for distributed computing with Ray +globs: ["*.py"] +alwaysApply: true +--- +# Distributed Computing Standards + +## Ray Framework Usage +- Use Ray for distributed task processing +- Implement proper actor patterns +- Use Ray's object store effectively +- Handle distributed errors properly + +## Scaling Patterns +- Implement proper auto-scaling +- Use resource management effectively +- Handle node failures gracefully +- Implement proper checkpointing + +## Performance +- Use Ray's performance monitoring +- Implement proper batching +- Use Ray's memory management +- Profile distributed operations \ No newline at end of file diff --git a/.cursor/rules/python/fastapi-standards.mdc b/.cursor/rules/python/fastapi-standards.mdc new file mode 100644 index 00000000..a83a666f --- /dev/null +++ b/.cursor/rules/python/fastapi-standards.mdc @@ -0,0 +1,41 @@ +# .cursor/rules/python/fastapi-standards.mdc +--- +description: FastAPI-specific development standards and patterns +globs: ["*.py"] +alwaysApply: true +--- +# FastAPI Development Standards + +## Project Structure +- Use the following directory structure: + ``` + app/ + ├── api/ + │ └── v1/ + │ └── endpoints/ + ├── core/ + │ ├── config.py + │ └── security.py + ├── models/ + ├── schemas/ + └── services/ + ``` + +## FastAPI Best Practices +- Use dependency injection for service dependencies +- Implement proper exception handlers +- Use background tasks for async operations +- Implement proper middleware chain +- Use FastAPI's built-in OpenAPI support + +## Performance Optimization +- Use async/await properly +- Implement caching strategies +- Use connection pooling for databases +- Implement proper background tasks + +## Security +- Use FastAPI's security dependencies +- Implement proper CORS policies +- Use rate limiting +- Implement proper authentication middleware \ No newline at end of file diff --git a/.cursor/rules/python/infrastructure.mdc b/.cursor/rules/python/infrastructure.mdc new file mode 100644 index 00000000..d9bd974d --- /dev/null +++ b/.cursor/rules/python/infrastructure.mdc @@ -0,0 +1,25 @@ +# .cursor/rules/python/infrastructure.mdc +--- +description: Infrastructure and deployment standards +globs: ["*.py", "Dockerfile", "*.yaml"] +alwaysApply: true +--- +# Infrastructure Standards + +## Containerization +- Use multi-stage Docker builds +- Implement proper health checks +- Use non-root users +- Follow container security best practices + +## Kubernetes +- Use proper resource requests/limits +- Implement proper probes +- Use proper service mesh integration +- Follow GitOps practices + +## CI/CD +- Implement proper testing stages +- Use proper security scanning +- Implement proper deployment strategies +- Use proper environment separation \ No newline at end of file diff --git a/.cursor/rules/python/microservices-architecture.mdc b/.cursor/rules/python/microservices-architecture.mdc new file mode 100644 index 00000000..e4352957 --- /dev/null +++ b/.cursor/rules/python/microservices-architecture.mdc @@ -0,0 +1,36 @@ +# .cursor/rules/python/microservices-architecture.mdc +--- +description: Guidelines for Python microservice architecture +globs: ["*.py"] +alwaysApply: true +--- +# Microservice Architecture Guidelines + +## Service Design +- Keep services small and focused on a single business capability +- Use FastAPI for HTTP APIs +- Use gRPC for internal service communication +- Implement health check endpoints +- Use OpenAPI/Swagger for API documentation +- Use proper service discovery +- Implement proper circuit breaking +- Use proper message queuing +- Implement proper retry policies + +## Configuration +- Use environment variables for service configuration +- Store secrets in a secure vault (e.g., HashiCorp Vault) +- Use configuration management for different environments +- Implement feature flags for gradual rollouts + +## Observability +- Implement structured logging using `structlog` +- Use OpenTelemetry for distributed tracing +- Implement metrics using Prometheus +- Set up proper monitoring dashboards + +## Resilience +- Implement circuit breakers for external calls +- Use retries with exponential backoff +- Implement rate limiting +- Handle partial failures gracefully \ No newline at end of file diff --git a/.cursor/rules/python/python-coding-rules.mdc b/.cursor/rules/python/python-coding-rules.mdc new file mode 100644 index 00000000..34e99a74 --- /dev/null +++ b/.cursor/rules/python/python-coding-rules.mdc @@ -0,0 +1,32 @@ +# .cursor/rules/python/python-coding-rules.mdc +--- +description: Python coding standards and best practices +globs: ["*.py"] +alwaysApply: true +--- +# Python Coding Standards + +## Code Style +- Follow PEP 8 style guide +- Use type hints for all function parameters and return values +- Maximum line length: 88 characters (Black formatter standard) +- Use descriptive variable names that reflect their purpose +- Use docstrings for all public modules, functions, classes, and methods + +## Project Structure +- Use src-layout pattern for all Python packages +- Separate business logic from API handlers +- Keep modules focused and single-responsibility +- Use absolute imports over relative imports + +## Error Handling +- Use custom exceptions for domain-specific errors +- Always include meaningful error messages +- Handle exceptions at appropriate levels +- Log errors with proper context + +## Best Practices +- Use dataclasses or Pydantic models for data structures +- Implement proper logging with structured data +- Use environment variables for configuration +- Follow the principle of least privilege \ No newline at end of file diff --git a/.cursor/rules/python/testing-standards.mdc b/.cursor/rules/python/testing-standards.mdc new file mode 100644 index 00000000..84638166 --- /dev/null +++ b/.cursor/rules/python/testing-standards.mdc @@ -0,0 +1,28 @@ +# .cursor/rules/python/testing-standards.mdc +--- +description: Testing requirements for Python microservices +globs: ["*_test.py", "test_*.py"] +alwaysApply: true +--- +# Testing Standards + +## Unit Tests +- Use pytest as the testing framework +- Maintain minimum 80% code coverage +- Mock external dependencies +- Use fixtures for test data +- Test both success and error cases + +## Integration Tests +- Test service integrations +- Use docker-compose for local testing +- Implement API tests using pytest-asyncio +- Test database interactions +- Verify message queue operations + +## Performance Tests +- Implement load tests using locust +- Test service scalability +- Measure response times +- Test rate limiting +- Verify resource usage \ No newline at end of file diff --git a/.cursor/rules/rego/rego-coding-patterns.mdc b/.cursor/rules/rego/rego-coding-patterns.mdc new file mode 100644 index 00000000..06695508 --- /dev/null +++ b/.cursor/rules/rego/rego-coding-patterns.mdc @@ -0,0 +1,94 @@ +--- +description: +globs: *.rego +alwaysApply: false +--- +# Common Rego Patterns and Idioms + +## Data Access Patterns +```rego +# Safe object access +value := object.get("key", "default") + +# Array iteration +result := [item | item := array[_]] + +# Set operations +combined := set_union(set1, set2) +``` + +## Control Flow +```rego +# Conditional logic +allow { + condition1 + condition2 +} else { + condition3 +} + +# Early returns +deny["reason"] { + not is_valid +} + +# Multiple conditions +allow { + count(violations) == 0 + is_authorized +} +``` + +## Common Functions +```rego +# Existence check +exists { + count(array) > 0 +} + +# Contains check +contains { + array[_] == value +} + +# All elements satisfy condition +all_valid { + count([x | x := array[_]; not is_valid(x)]) == 0 +} +``` + +## Testing Patterns +```rego +# Test case structure +test_allow_when_valid { + allow with input as {"valid": true} +} + +# Test helper functions +test_is_valid { + is_valid with input as {"value": "test"} +} +``` + +## Error Handling +```rego +# Error collection +errors[msg] { + not is_valid + msg := "Invalid input" +} + +# Multiple error messages +errors[msg] { + not is_authorized + msg := "Not authorized" +} +``` + +## Best Practices +1. Use helper functions for complex logic +2. Keep rules focused and single-purpose +3. Use meaningful variable names +4. Document complex logic with comments +5. Use consistent formatting +6. Break down complex conditions into smaller rules diff --git a/context/Base Account Pull Request Guidelines.md b/context/Base Account Pull Request Guidelines.md new file mode 100644 index 00000000..2f9e6e99 --- /dev/null +++ b/context/Base Account Pull Request Guidelines.md @@ -0,0 +1,172 @@ +# Base Account Pull Request Guidelines + +Status **Living** +Updated Jun 16, 2025 +Created Oct 20, 2024 + +[Overview](#overview) + +[Why](#why) + +[SLA](#sla) + +[Success Metrics](#success-metrics) + +[Guidelines](#guidelines) + +[Authoring](#authoring) + +[Reviewing](#reviewing) + +[Owning a Codebase](#owning-a-codebase) + +[Appendix](#appendix) + +[FAQ](#faq) + +[Resources](#resources) + +[Notes](#notes) + +# Overview {#overview} + +*“Honesty in small things is not a small thing.”* + +- *Robert C. Martin, Clean Code: A Handbook of Agile Software Craftsmanship* + +PRs are the lifeline of the team. It’s what allows us to ship value, decides our future in terms of maintenance cost, and has a high impact on our daily QOL. When code is well-maintained and untangled, velocity is sustained. + +This document lays out SLAs, guidelines, and how we think about pull requests as a world-class engineering team. + +## Why {#why} + +Having a quality pull request process allows us to build with sustained velocity and deliver improvements and features to our users. + +Pull requests allow us to: + +* **Hold and improve the bar on quality:** we can catch bugs and architectural code smells early, before these have reached QA or users +* **Build a championship team and mentor top talent**: by knowledge-sharing through clear descriptions and deep, thoughtful reviews, this will help our team become stronger +* **Stay customer focused with repeatable innovation:** by keeping the PRs tight and decoupled, this will allow us to consistently ship (or roll back) incremental improvements to our customers +* **Encourage ownership:** by having clear ownership around domains, this will motivate authors and reviewers to hold the quality high, allowing for the codebase to be malleable to fit the business needs while reducing incidents and bugs + +As engineers, pull requests are a key part of our day-to-day life, whether it’s authoring or reviewing them. By holding a high bar, it will also improve our daily quality of life. + +## SLA {#sla} + +| Name | SLA | Why | +| :---- | :---- | :---- | +| **PR Review** | Reviewed within half a working day | Pull requests are encouraged to be reviewed within half a working day. If they take longer than 1 working day, likely, something needs to be improved. | + +## Success Metrics {#success-metrics} + +| Metric | Why | +| :---- | :---- | +| **Time to PR Review** | Getting a review quickly and swiftly is one of the fastest ways to get unblocked. By having fast reviews, it powers the flywheel of shared context → quality code → maintainable codebase → iteration. | +| **Time from PR Open to Production** | Getting code merged is one thing. Getting it deployed is how it gets in front of customers. | +| **\# of Incidents** | By having a quality author and review process, errors should be caught during the pull request process. | +| **\# of QA regression bugs** | By having a quality author and review process, errors should be caught during the pull request process. | + +# Guidelines {#guidelines} + +## Authoring {#authoring} + +* **PRs should be tight.** This allows for teammates to review deeply and thoroughly. PRs should either make deep changes (creating two new functions) or a shallow change across a breadth of files (renaming a function). + + * PRs should be \<500 LOC. This is a guideline. There may be PRs which may be higher LOC (eg: if it’s auto-generated, boilerplate, or scaffolding). There also may be PRs which are 1-2 lines. + + * PRs should touch \<10 files. This is a guideline. If the PR is focused on renaming across a codebase, it could be 30+ files, but with minimal or no other business logic change. + +* **PRs should be well-described.** This allows for teammates to understand the problem and what the PR sets out to do. Importantly, it also allows for verification of the code and is well-documented for posterity. + +* **PRs should be well-tested.** Any change in code will impact flows. These flows should be well-tested. These can be manually tested and unit tested. Additionally, a QA regression test could be added. + +* **Consider who the reviewers are.** Reviewers are ideally owners of the codebase and/or those with deep knowledge of the domain. By reaching out and finding dedicated reviewers early in the process, it also gives a heads up to the reviewers, allowing them to schedule and prioritize reviews. + +* **Budget time for reviews.** Allow the reviewers time to comment and suggest. Even more importantly, allow there to be time to make improvements. Code is written once, but read much more times. + +* **Consider hosting a synchronous, live review.** Sometimes, it’s easier to communicate live with the reviewers to align (or disagree and commit). Please work the alignment back into the PR for posterity. + +* **Examples:** + + * [https://github.cbhq.net/wallet/wallet-mobile/pull/27738](https://github.cbhq.net/wallet/wallet-mobile/pull/27738) + * [https://github.cbhq.net/wallet/wallet-mobile/pull/26092](https://github.cbhq.net/wallet/wallet-mobile/pull/26092) + +## Reviewing {#reviewing} + +* **PRs should be reviewed within half a day.** This is one of the things worth prioritizing for teammates as it generates a flywheel to improve velocity and knowledge-sharing. At the same time, PRs should be relatively easy to review, given the description, self-review, and code quality. + +* **PRs should be reviewed in detail.** No rubber stamping. + +## Owning a Codebase {#owning-a-codebase} + +**Unit Test** +[Base Wallet Unit Test + Lint SLAs](https://docs.google.com/document/u/0/d/1Ai3UDVDR3Hq1P-smfaeuclxDMQYSJjwlAPFyERraJCI/edit) + +**Unit Test Thresholds** +The owners should decide on unit test thresholds. These thresholds should reflect the appropriate LOE and risk for the business given what code is responsible for. + +**Conventions** +The team should have consensus on conventions, or lack of. Ideally, conventions are automated or linted. Else, they should be documented. + +# Appendix {#appendix} + +## FAQ {#faq} + +**There is a tight timeline and it’s easier to get everything into 1 humongous PR. Can we make an exception?** +Short answer: yes, we can always make an exception. There is no hard and fast rule. If there are humongous PRs, I’d recommend having a retro on it to see what could have been done differently. + +**When should we, as authors, seek out reviewers?** +Short answer: as early as possible. This can differ based on the type of work. + +If there is an artifact (PPS/TDD), the reviewers of the artifact likely should also be pull request reviewers. Transitively, reviewers of the pull requests should be reviewers of the artifact. + +## Resources {#resources} + +* [\[Code Red\] Bar Raiser Code Review Program](https://docs.google.com/document/d/1bzYI2gdnNZI9MqvHyTrD7aZRUfrakVedLRe2gw8b114/edit?tab=t.0#heading=h.ismr2neqrl10) +* [Wallet Pull Request Guild (PRG) Readout](https://docs.google.com/document/u/0/d/1nyE26o9DwQnTstJBrwMYgr6qdQPe71YrluzyiMOU89A/edit) + +## Notes {#notes} + +Oct 24, 2024 + +Questions + +* What are our actual problems with our current code and review process? + * Breaking ERC specs + * Break spec on eth request accounts for Pano + * Documentation may have solved this + * Additional parameter on connection + * Time to review is longer than ideal + * Observation: authors repost requesting reviews in Slack + * Hypotheses + * Lack of context + * Lack of ownership +* Does having people not labeled as “Bar Raiser” set the wrong culture? + * Do we want a culture where some people can absolve themselves of the responsibility to raise the bar? +* Regardless of bar-raisers, we could take it down one level. What do we care about? +* [Wallet Pull Request Guild (PRG) Readout](https://docs.google.com/document/u/0/d/1nyE26o9DwQnTstJBrwMYgr6qdQPe71YrluzyiMOU89A/edit) +* Less in-flight projects +* Implement code owners + +\> Bar Raisers for a given component or system are a subset of the broader owning team, typically consisting of more experienced engineers. For repositories or subdirectories with Bar Raisers, PRs must be either authored by a Bar Raiser or receive approval from one before merging. All existing review and merge rules still apply in addition to the Bar Raiser requirement. + +Domains + +* Transaction / Signing: [Cody Crozier](mailto:cody.crozier@coinbase.com), [Lukas Rosario](mailto:lukas.rosario@coinbase.com), [Arjun Dureja](mailto:arjun.dureja@coinbase.com) +* Sessions: [Spencer Stock](mailto:spencer.stock@coinbase.com), [Jake Feldman](mailto:jake.feldman@coinbase.com), [Felix Zhang](mailto:felix.zhang@coinbase.com) +* SDK: [Felix Zhang](mailto:felix.zhang@coinbase.com), [Jake Feldman](mailto:jake.feldman@coinbase.com), [Spencer Stock](mailto:spencer.stock@coinbase.com), [Conner Swenberg](mailto:conner.swenberg@coinbase.com) +* BE: [Sam Luo](mailto:sam.luo@coinbase.com), [Adam Hodges](mailto:adam.hodges@coinbase.com) +* Smart contracts: [Amie Corso](mailto:amie.corso@coinbase.com), [Conner Swenberg](mailto:conner.swenberg@coinbase.com) +* Infra: ? + +Proposal + +* Opt-in ✅ +* Everyone should maintain code and hold a high bar (be a bar raiser) ✅ +* Further discussion + * What it means to be a PR reviewer + * What improvements we can make + * Faster PR reviews + * Higher quality +* Breaking ERC specs (retro this) +* Other ways we can raise the bar \ No newline at end of file diff --git a/context/Mini TDD - TIPS 4337 Bundler .docx b/context/Mini TDD - TIPS 4337 Bundler .docx new file mode 100644 index 00000000..e41bf4d5 Binary files /dev/null and b/context/Mini TDD - TIPS 4337 Bundler .docx differ diff --git a/crates/account-abstraction-core/core/src/lib.rs b/crates/account-abstraction-core/core/src/lib.rs index fe08aa70..b3aa2f76 100644 --- a/crates/account-abstraction-core/core/src/lib.rs +++ b/crates/account-abstraction-core/core/src/lib.rs @@ -3,3 +3,4 @@ pub mod entrypoints; pub mod types; pub use account_abstraction_service::{AccountAbstractionService, AccountAbstractionServiceImpl}; pub use types::{SendUserOperationResponse, VersionedUserOperation}; +pub mod mempool; diff --git a/crates/account-abstraction-core/core/src/mempool.rs b/crates/account-abstraction-core/core/src/mempool.rs new file mode 100644 index 00000000..3cc274f7 --- /dev/null +++ b/crates/account-abstraction-core/core/src/mempool.rs @@ -0,0 +1,403 @@ +use crate::types::{PoolOperation, UserOpHash}; +use std::collections::{BTreeSet, HashMap}; +use std::sync::Arc; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; + +pub struct PoolConfig { + minimum_max_fee_per_gas: u128, +} + +#[derive(Eq, PartialEq, Clone, Debug)] +pub struct OrderedPoolOperation { + pub pool_operation: PoolOperation, + pub submission_id: u64, + pub priority_order: u64, +} + +impl Ord for OrderedPoolOperation { + /// TODO: There can be invalid opperations, where base fee, + expected gas price + /// is greater that the maximum gas, in that case we don't include it in the mempool as such mempool changes. + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + other + .pool_operation + .operation + .max_priority_fee_per_gas() + .cmp(&self.pool_operation.operation.max_priority_fee_per_gas()) + .then_with(|| self.submission_id.cmp(&other.submission_id)) + } +} + +impl PartialOrd for OrderedPoolOperation { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl OrderedPoolOperation { + pub fn create_from_pool_operation(operation: &PoolOperation, submission_id: u64) -> Self { + Self { + pool_operation: operation.clone(), + priority_order: submission_id, + submission_id, + } + } +} + +pub trait Mempool { + fn add_operation( + &mut self, + operation: &PoolOperation, + ) -> Result, anyhow::Error>; + fn get_top_operations(&self, n: usize) -> impl Iterator>; + fn remove_operation( + &mut self, + operation_hash: &UserOpHash, + ) -> Result, anyhow::Error>; +} + +pub struct MempoolImpl { + config: PoolConfig, + best: BTreeSet, + hash_to_operation: HashMap, + submission_id_counter: AtomicU64, +} + +impl Mempool for MempoolImpl { + fn add_operation( + &mut self, + operation: &PoolOperation, + ) -> Result, anyhow::Error> { + if operation.operation.max_fee_per_gas() < self.config.minimum_max_fee_per_gas { + return Err(anyhow::anyhow!( + "Gas price is below the minimum required PVG gas" + )); + } + let ordered_operation_result = self.handle_add_operation(operation)?; + Ok(ordered_operation_result) + } + + fn get_top_operations(&self, n: usize) -> impl Iterator> { + self.best + .iter() + .take(n) + .map(|o| Arc::new(o.pool_operation.clone())) + } + + fn remove_operation( + &mut self, + operation_hash: &UserOpHash, + ) -> Result, anyhow::Error> { + if let Some(ordered_operation) = self.hash_to_operation.remove(operation_hash) { + self.best.remove(&ordered_operation); + Ok(Some(ordered_operation.pool_operation)) + } else { + Ok(None) + } + } +} + +impl MempoolImpl { + fn handle_add_operation( + &mut self, + operation: &PoolOperation, + ) -> Result, anyhow::Error> { + if let Some(old_ordered_operation) = self.hash_to_operation.get(&operation.hash) { + if operation.should_replace(&old_ordered_operation.pool_operation) { + self.best.remove(old_ordered_operation); + self.hash_to_operation.remove(&operation.hash); + } else { + return Ok(None); + } + } + + let order = self.get_next_order_id(); + let ordered_operation = OrderedPoolOperation::create_from_pool_operation(operation, order); + + self.best.insert(ordered_operation.clone()); + self.hash_to_operation + .insert(operation.hash, ordered_operation.clone()); + Ok(Some(ordered_operation)) + } + + fn get_next_order_id(&self) -> u64 { + self.submission_id_counter.fetch_add(1, Ordering::SeqCst) + } + + pub fn new(config: PoolConfig) -> Self { + Self { + config, + best: BTreeSet::new(), + hash_to_operation: HashMap::new(), + submission_id_counter: AtomicU64::new(0), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::VersionedUserOperation; + use alloy_primitives::{Address, FixedBytes, Uint}; + use alloy_rpc_types::erc4337; + fn create_test_user_operation(max_priority_fee_per_gas: u128) -> VersionedUserOperation { + VersionedUserOperation::UserOperation(erc4337::UserOperation { + sender: Address::ZERO, + nonce: Uint::from(0), + init_code: Default::default(), + call_data: Default::default(), + call_gas_limit: Uint::from(100000), + verification_gas_limit: Uint::from(100000), + pre_verification_gas: Uint::from(21000), + max_fee_per_gas: Uint::from(max_priority_fee_per_gas), + max_priority_fee_per_gas: Uint::from(max_priority_fee_per_gas), + paymaster_and_data: Default::default(), + signature: Default::default(), + }) + } + + fn create_pool_operation(max_priority_fee_per_gas: u128, hash: UserOpHash) -> PoolOperation { + PoolOperation { + operation: create_test_user_operation(max_priority_fee_per_gas), + hash, + } + } + + fn create_test_mempool(minimum_required_pvg_gas: u128) -> MempoolImpl { + MempoolImpl::new(PoolConfig { + minimum_max_fee_per_gas: minimum_required_pvg_gas, + }) + } + + // Tests successfully adding a valid operation to the mempool + #[test] + fn test_add_operation_success() { + let mut mempool = create_test_mempool(1000); + let hash = FixedBytes::from([1u8; 32]); + let operation = create_pool_operation(2000, hash); + + let result = mempool.add_operation(&operation); + + assert!(result.is_ok()); + let ordered_op = result.unwrap(); + assert!(ordered_op.is_some()); + let ordered_op = ordered_op.unwrap(); + assert_eq!(ordered_op.pool_operation.hash, hash); + assert_eq!( + ordered_op.pool_operation.operation.max_fee_per_gas(), + Uint::from(2000) + ); + } + + // Tests adding an operation with a gas price below the minimum required PVG gas + #[test] + fn test_add_operation_below_minimum_gas() { + let mut mempool = create_test_mempool(2000); + let hash = FixedBytes::from([1u8; 32]); + let operation = create_pool_operation(1000, hash); + + let result = mempool.add_operation(&operation); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Gas price is below the minimum required PVG gas") + ); + } + + // Tests adding an operation with the same hash but higher gas price + #[test] + fn test_add_operation_duplicate_hash_higher_gas() { + let mut mempool = create_test_mempool(1000); + let hash = FixedBytes::from([1u8; 32]); + + let operation1 = create_pool_operation(2000, hash); + let result1 = mempool.add_operation(&operation1); + assert!(result1.is_ok()); + assert!(result1.unwrap().is_some()); + + let operation2 = create_pool_operation(3000, hash); + let result2 = mempool.add_operation(&operation2); + assert!(result2.is_ok()); + assert!(result2.unwrap().is_some()); + } + + // Tests adding an operation with the same hash but lower gas price + #[test] + fn test_add_operation_duplicate_hash_lower_gas() { + let mut mempool = create_test_mempool(1000); + let hash = FixedBytes::from([1u8; 32]); + + let operation1 = create_pool_operation(3000, hash); + let result1 = mempool.add_operation(&operation1); + assert!(result1.is_ok()); + assert!(result1.unwrap().is_some()); + + let operation2 = create_pool_operation(2000, hash); + let result2 = mempool.add_operation(&operation2); + assert!(result2.is_ok()); + assert!(result2.unwrap().is_none()); + } + + // Tests adding an operation with the same hash and equal gas price + #[test] + fn test_add_operation_duplicate_hash_equal_gas() { + let mut mempool = create_test_mempool(1000); + let hash = FixedBytes::from([1u8; 32]); + + let operation1 = create_pool_operation(2000, hash); + let result1 = mempool.add_operation(&operation1); + assert!(result1.is_ok()); + assert!(result1.unwrap().is_some()); + + let operation2 = create_pool_operation(2000, hash); + let result2 = mempool.add_operation(&operation2); + assert!(result2.is_ok()); + assert!(result2.unwrap().is_none()); + } + + // Tests adding multiple operations with different hashes + #[test] + fn test_add_multiple_operations_with_different_hashes() { + let mut mempool = create_test_mempool(1000); + + let hash1 = FixedBytes::from([1u8; 32]); + let operation1 = create_pool_operation(2000, hash1); + let result1 = mempool.add_operation(&operation1); + assert!(result1.is_ok()); + assert!(result1.unwrap().is_some()); + + let hash2 = FixedBytes::from([2u8; 32]); + let operation2 = create_pool_operation(3000, hash2); + let result2 = mempool.add_operation(&operation2); + assert!(result2.is_ok()); + assert!(result2.unwrap().is_some()); + + let hash3 = FixedBytes::from([3u8; 32]); + let operation3 = create_pool_operation(1500, hash3); + let result3 = mempool.add_operation(&operation3); + assert!(result3.is_ok()); + assert!(result3.unwrap().is_some()); + + assert_eq!(mempool.hash_to_operation.len(), 3); + assert_eq!(mempool.best.len(), 3); + } + + // Tests removing an operation that is not in the mempool + #[test] + fn test_remove_operation_not_in_mempool() { + let mut mempool = create_test_mempool(1000); + let hash = FixedBytes::from([1u8; 32]); + + let result = mempool.remove_operation(&hash); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); + } + + // Tests removing an operation that exists in the mempool + #[test] + fn test_remove_operation_exists() { + let mut mempool = create_test_mempool(1000); + let hash = FixedBytes::from([1u8; 32]); + let operation = create_pool_operation(2000, hash); + + mempool.add_operation(&operation).unwrap(); + + let result = mempool.remove_operation(&hash); + assert!(result.is_ok()); + let removed = result.unwrap(); + assert!(removed.is_some()); + let removed_op = removed.unwrap(); + assert_eq!(removed_op.hash, hash); + assert_eq!(removed_op.operation.max_fee_per_gas(), Uint::from(2000)); + } + + // Tests removing an operation and checking the best operations + #[test] + fn test_remove_operation_and_check_best() { + let mut mempool = create_test_mempool(1000); + let hash = FixedBytes::from([1u8; 32]); + let operation = create_pool_operation(2000, hash); + + mempool.add_operation(&operation).unwrap(); + + let best_before: Vec<_> = mempool.get_top_operations(10).collect(); + assert_eq!(best_before.len(), 1); + assert_eq!(best_before[0].hash, hash); + + let result = mempool.remove_operation(&hash); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + + let best_after: Vec<_> = mempool.get_top_operations(10).collect(); + assert_eq!(best_after.len(), 0); + } + + // Tests getting the top operations with ordering + #[test] + fn test_get_top_operations_ordering() { + let mut mempool = create_test_mempool(1000); + + let hash1 = FixedBytes::from([1u8; 32]); + let operation1 = create_pool_operation(2000, hash1); + mempool.add_operation(&operation1).unwrap(); + + let hash2 = FixedBytes::from([2u8; 32]); + let operation2 = create_pool_operation(3000, hash2); + mempool.add_operation(&operation2).unwrap(); + + let hash3 = FixedBytes::from([3u8; 32]); + let operation3 = create_pool_operation(1500, hash3); + mempool.add_operation(&operation3).unwrap(); + + let best: Vec<_> = mempool.get_top_operations(10).collect(); + assert_eq!(best.len(), 3); + assert_eq!(best[0].operation.max_fee_per_gas(), Uint::from(3000)); + assert_eq!(best[1].operation.max_fee_per_gas(), Uint::from(2000)); + assert_eq!(best[2].operation.max_fee_per_gas(), Uint::from(1500)); + } + + // Tests getting the top operations with a limit + #[test] + fn test_get_top_operations_limit() { + let mut mempool = create_test_mempool(1000); + + let hash1 = FixedBytes::from([1u8; 32]); + let operation1 = create_pool_operation(2000, hash1); + mempool.add_operation(&operation1).unwrap(); + + let hash2 = FixedBytes::from([2u8; 32]); + let operation2 = create_pool_operation(3000, hash2); + mempool.add_operation(&operation2).unwrap(); + + let hash3 = FixedBytes::from([3u8; 32]); + let operation3 = create_pool_operation(1500, hash3); + mempool.add_operation(&operation3).unwrap(); + + let best: Vec<_> = mempool.get_top_operations(2).collect(); + assert_eq!(best.len(), 2); + assert_eq!(best[0].operation.max_fee_per_gas(), Uint::from(3000)); + assert_eq!(best[1].operation.max_fee_per_gas(), Uint::from(2000)); + } + + // Tests top opperations tie breaker with submission id + #[test] + fn test_get_top_operations_submission_id_tie_breaker() { + let mut mempool = create_test_mempool(1000); + + let hash1 = FixedBytes::from([1u8; 32]); + let operation1 = create_pool_operation(2000, hash1); + mempool.add_operation(&operation1).unwrap().unwrap(); + + let hash2 = FixedBytes::from([2u8; 32]); + let operation2 = create_pool_operation(2000, hash2); + mempool.add_operation(&operation2).unwrap().unwrap(); + + let best: Vec<_> = mempool.get_top_operations(2).collect(); + assert_eq!(best.len(), 2); + assert_eq!(best[0].hash, hash1); + assert_eq!(best[1].hash, hash2); + } +} diff --git a/crates/account-abstraction-core/core/src/types.rs b/crates/account-abstraction-core/core/src/types.rs index 03e3eb2d..04e3c9af 100644 --- a/crates/account-abstraction-core/core/src/types.rs +++ b/crates/account-abstraction-core/core/src/types.rs @@ -1,5 +1,5 @@ use crate::entrypoints::{v06, v07, version::EntryPointVersion}; -use alloy_primitives::{Address, B256, ChainId, U256}; +use alloy_primitives::{Address, B256, ChainId, FixedBytes, U256}; use alloy_rpc_types::erc4337; pub use alloy_rpc_types::erc4337::SendUserOperationResponse; use anyhow::Result; @@ -11,6 +11,23 @@ pub enum VersionedUserOperation { UserOperation(erc4337::UserOperation), PackedUserOperation(erc4337::PackedUserOperation), } + +impl VersionedUserOperation { + pub fn max_fee_per_gas(&self) -> U256 { + match self { + VersionedUserOperation::UserOperation(op) => op.max_fee_per_gas, + VersionedUserOperation::PackedUserOperation(op) => op.max_fee_per_gas, + } + } + + pub fn max_priority_fee_per_gas(&self) -> U256 { + match self { + VersionedUserOperation::UserOperation(op) => op.max_priority_fee_per_gas, + VersionedUserOperation::PackedUserOperation(op) => op.max_priority_fee_per_gas, + } + } +} + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct UserOperationRequest { pub user_operation: VersionedUserOperation, @@ -107,6 +124,20 @@ pub struct AggregatorInfo { pub stake_info: EntityStakeInfo, } +pub type UserOpHash = FixedBytes<32>; + +#[derive(Eq, PartialEq, Clone, Debug)] +pub struct PoolOperation { + pub operation: VersionedUserOperation, + pub hash: UserOpHash, +} + +impl PoolOperation { + pub fn should_replace(&self, other: &PoolOperation) -> bool { + self.operation.max_fee_per_gas() > other.operation.max_fee_per_gas() + } +} + // Tests #[cfg(test)] mod tests { diff --git a/crates/audit/src/reader.rs b/crates/audit/src/reader.rs index f58265d2..7e4de6a5 100644 --- a/crates/audit/src/reader.rs +++ b/crates/audit/src/reader.rs @@ -1,4 +1,4 @@ -use crate::types::BundleEvent; +use crate::types::{BundleEvent, UserOpEvent}; use anyhow::Result; use async_trait::async_trait; use rdkafka::{ @@ -129,3 +129,98 @@ impl KafkaAuditLogReader { &self.topic } } + +#[derive(Debug, Clone)] +pub struct UserOpEventWrapper { + pub key: String, + pub event: UserOpEvent, + pub timestamp: i64, +} + +#[async_trait] +pub trait UserOpEventReader { + async fn read_event(&mut self) -> Result; + async fn commit(&mut self) -> Result<()>; +} + +pub struct KafkaUserOpAuditLogReader { + consumer: StreamConsumer, + topic: String, + last_message_offset: Option, + last_message_partition: Option, +} + +impl KafkaUserOpAuditLogReader { + pub fn new(consumer: StreamConsumer, topic: String) -> Result { + consumer.subscribe(&[&topic])?; + Ok(Self { + consumer, + topic, + last_message_offset: None, + last_message_partition: None, + }) + } +} + +#[async_trait] +impl UserOpEventReader for KafkaUserOpAuditLogReader { + async fn read_event(&mut self) -> Result { + match self.consumer.recv().await { + Ok(message) => { + let payload = message + .payload() + .ok_or_else(|| anyhow::anyhow!("Message has no payload"))?; + + let timestamp = match message.timestamp() { + Timestamp::CreateTime(millis) => millis, + Timestamp::LogAppendTime(millis) => millis, + Timestamp::NotAvailable => SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as i64, + }; + + let event: UserOpEvent = serde_json::from_slice(payload)?; + + debug!( + user_op_hash = %event.user_op_hash(), + timestamp = timestamp, + offset = message.offset(), + partition = message.partition(), + "Received UserOp event" + ); + + self.last_message_offset = Some(message.offset()); + self.last_message_partition = Some(message.partition()); + + let key = message + .key() + .map(|k| String::from_utf8_lossy(k).to_string()) + .ok_or_else(|| anyhow::anyhow!("Message missing required key"))?; + + Ok(UserOpEventWrapper { + key, + event, + timestamp, + }) + } + Err(e) => { + error!(error = %e, "Error receiving UserOp message from Kafka"); + sleep(Duration::from_secs(1)).await; + Err(e.into()) + } + } + } + + async fn commit(&mut self) -> Result<()> { + if let (Some(offset), Some(partition)) = + (self.last_message_offset, self.last_message_partition) + { + let mut tpl = TopicPartitionList::new(); + tpl.add_partition_offset(&self.topic, partition, rdkafka::Offset::Offset(offset + 1))?; + self.consumer + .commit(&tpl, rdkafka::consumer::CommitMode::Async)?; + } + Ok(()) + } +} diff --git a/crates/audit/tests/integration_tests.rs b/crates/audit/tests/integration_tests.rs index bb79d24d..25f35d9a 100644 --- a/crates/audit/tests/integration_tests.rs +++ b/crates/audit/tests/integration_tests.rs @@ -1,9 +1,13 @@ +use alloy_primitives::{Address, B256, U256}; use std::time::Duration; use tips_audit::{ - KafkaAuditArchiver, KafkaAuditLogReader, - publisher::{BundleEventPublisher, KafkaBundleEventPublisher}, + KafkaAuditArchiver, KafkaAuditLogReader, KafkaUserOpAuditLogReader, UserOpEventReader, + publisher::{ + BundleEventPublisher, KafkaBundleEventPublisher, KafkaUserOpEventPublisher, + UserOpEventPublisher, + }, storage::{BundleEventS3Reader, S3EventReaderWriter}, - types::{BundleEvent, DropReason}, + types::{BundleEvent, DropReason, UserOpEvent}, }; use tips_core::test_utils::create_bundle_from_txn_data; use uuid::Uuid; @@ -71,3 +75,50 @@ async fn test_kafka_publisher_s3_archiver_integration() Ok(()) } + +#[tokio::test] +async fn test_userop_kafka_publisher_reader_integration() +-> Result<(), Box> { + let harness = TestHarness::new().await?; + let topic = "test-userop-events"; + + let test_user_op_hash = B256::from_slice(&[1u8; 32]); + let test_sender = Address::from_slice(&[2u8; 20]); + let test_entry_point = Address::from_slice(&[3u8; 20]); + let test_nonce = U256::from(42); + + let test_event = UserOpEvent::AddedToMempool { + user_op_hash: test_user_op_hash, + sender: test_sender, + entry_point: test_entry_point, + nonce: test_nonce, + }; + + let publisher = KafkaUserOpEventPublisher::new(harness.kafka_producer, topic.to_string()); + publisher.publish(test_event.clone()).await?; + + let mut reader = KafkaUserOpAuditLogReader::new(harness.kafka_consumer, topic.to_string())?; + + let received = tokio::time::timeout(Duration::from_secs(10), reader.read_event()).await??; + + assert_eq!(received.event.user_op_hash(), test_user_op_hash); + + match received.event { + UserOpEvent::AddedToMempool { + user_op_hash, + sender, + entry_point, + nonce, + } => { + assert_eq!(user_op_hash, test_user_op_hash); + assert_eq!(sender, test_sender); + assert_eq!(entry_point, test_entry_point); + assert_eq!(nonce, test_nonce); + } + _ => panic!("Expected AddedToMempool event"), + } + + reader.commit().await?; + + Ok(()) +}