From c355bbc4c2558d3b965d35b21ce1d98203c78c44 Mon Sep 17 00:00:00 2001 From: Suryasai Turaga Date: Sun, 7 Dec 2025 16:14:32 -0600 Subject: [PATCH] Add oauth_auto_token_rotation subproject MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new subproject for automatic OAuth token rotation for Databricks PostgreSQL (Lakebase) connections. Features: - Automatic token rotation every 50 minutes (before 60-min expiry) - Zero downtime with atomic .pgpass file updates - Dual authentication: OAuth M2M (production) and CLI (development) - Background service support: macOS LaunchAgent / Linux systemd - Comprehensive logging with rotation - Cross-platform support (macOS, Linux) This tool solves a critical automation problem where Databricks OAuth tokens expire after 60 minutes, eliminating manual token regeneration. šŸ¤– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- CODEOWNERS | 1 + oauth_auto_token_rotation/README.md | 356 +++++++++++++++ oauth_auto_token_rotation/__init__.py | 14 + oauth_auto_token_rotation/cli.py | 134 ++++++ oauth_auto_token_rotation/install.py | 293 +++++++++++++ oauth_auto_token_rotation/requirements.txt | 2 + oauth_auto_token_rotation/rotator.py | 405 ++++++++++++++++++ .../templates/launchd.plist.template | 42 ++ 8 files changed, 1247 insertions(+) create mode 100644 oauth_auto_token_rotation/README.md create mode 100644 oauth_auto_token_rotation/__init__.py create mode 100644 oauth_auto_token_rotation/cli.py create mode 100644 oauth_auto_token_rotation/install.py create mode 100644 oauth_auto_token_rotation/requirements.txt create mode 100644 oauth_auto_token_rotation/rotator.py create mode 100644 oauth_auto_token_rotation/templates/launchd.plist.template diff --git a/CODEOWNERS b/CODEOWNERS index 5e3a2b92..502e6484 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -16,6 +16,7 @@ ip_access_list_analyzer @alexott judge-builder @smoorjani @alkispoly-db ka-chat-bot @taiga-db metascan @nfx @alexott +oauth_auto_token_rotation @suryasai87 runtime-packages @nfx @alexott sql_migration_copilot @robertwhiffin tacklebox @Jonathan-Choi diff --git a/oauth_auto_token_rotation/README.md b/oauth_auto_token_rotation/README.md new file mode 100644 index 00000000..48be5ad1 --- /dev/null +++ b/oauth_auto_token_rotation/README.md @@ -0,0 +1,356 @@ +--- +title: "OAuth Auto Token Rotation for Databricks PostgreSQL" +language: python +author: "Surya Sai Turaga" +date: 2024-11-22 + +tags: +- security +- oauth +- postgresql +- lakebase +- automation +- script +- installable +--- + +# OAuth Auto Token Rotation for Databricks PostgreSQL (Lakebase) + +Automatic OAuth token rotation for Databricks PostgreSQL (Lakebase) connections. Eliminates the need for manual token updates by running as a background service that automatically refreshes OAuth tokens every 50 minutes and updates your `.pgpass` file. + +## Features + +- **Automatic Token Rotation** - Refreshes OAuth tokens every 50 minutes (before 60-minute expiry) +- **Zero Downtime** - Atomic `.pgpass` file updates prevent connection interruptions +- **Dual Authentication** - Supports both OAuth M2M (production) and CLI (development) +- **Background Service** - Runs as macOS LaunchAgent or Linux systemd service +- **Comprehensive Logging** - Rotating logs with detailed operation tracking +- **Easy Installation** - Simple `pip install` and one-command setup +- **Cross-Platform** - Works on macOS and Linux + +## Installation + +You need to have Python 3.8+ installed. Install the package: + +```bash +pip install git+https://github.com/suryasai87/oauth_auto_token_rotation.git +``` + +Or install via Databricks Labs sandbox: + +```sh +databricks labs install sandbox +``` + +## Quick Start + +### Configuration + +Set your Databricks workspace and PostgreSQL connection details: + +```bash +export DATABRICKS_HOST="https://your-workspace.cloud.databricks.com" +export DATABRICKS_PG_HOST="instance-xyz.database.cloud.databricks.com" +export DATABRICKS_PG_USERNAME="your-email@company.com" +``` + +### Test It + +Run a test rotation to verify everything works: + +```bash +databricks-oauth-rotator --once +``` + +### Install as Background Service + +Install and start the automatic rotation service: + +```bash +databricks-oauth-install \ + --workspace-url https://your-workspace.cloud.databricks.com \ + --pg-host instance-xyz.database.cloud.databricks.com \ + --pg-username your-email@company.com +``` + +That's it! The service will now automatically rotate your OAuth tokens every 50 minutes. + +## How It Works + +``` ++-------------------------------------+ +| Background Service (every 50m) | ++------------------+------------------+ + | + v ++--------------------------------------+ +| 1. Get fresh OAuth token | +| - Try OAuth M2M first | +| - Fallback to Databricks CLI | ++------------------+-------------------+ + | + v ++--------------------------------------+ +| 2. Verify token validity (60 min) | ++------------------+-------------------+ + | + v ++--------------------------------------+ +| 3. Update ~/.pgpass atomically | ++------------------+-------------------+ + | + v ++--------------------------------------+ +| 4. Log success and sleep | ++--------------------------------------+ +``` + +## Authentication Methods + +### Option 1: Databricks CLI (Recommended for Development) + +Easiest for personal use - uses browser-based OAuth: + +```bash +pip install databricks-cli +databricks auth login --host https://your-workspace.cloud.databricks.com +``` + +The rotator will automatically use your CLI credentials. + +### Option 2: OAuth M2M (Recommended for Production) + +Best for automation and production use: + +1. Create a service principal in Databricks +2. Generate OAuth secret +3. Set environment variables: + +```bash +export DATABRICKS_CLIENT_ID="your-service-principal-id" +export DATABRICKS_CLIENT_SECRET="your-oauth-secret" +``` + +## Usage + +### Command-Line Interface + +```bash +# Run once (test mode) +databricks-oauth-rotator --once + +# Run as daemon with custom interval +databricks-oauth-rotator --interval 45 + +# Specify all parameters explicitly +databricks-oauth-rotator \ + --workspace-url https://workspace.cloud.databricks.com \ + --pg-host instance-xyz.database.cloud.databricks.com \ + --pg-username user@company.com \ + --pg-port 5432 \ + --pg-database databricks_postgres \ + --interval 50 +``` + +### Service Management + +```bash +# Install service +databricks-oauth-install + +# Check service status +databricks-oauth-status + +# Restart service +databricks-oauth-restart + +# Uninstall service +databricks-oauth-uninstall +``` + +### Python API + +Use the rotator programmatically in your Python code: + +```python +from databricks_oauth_rotator import DatabricksOAuthRotator + +# Create rotator instance +rotator = DatabricksOAuthRotator( + workspace_url="https://workspace.cloud.databricks.com", + pg_host="instance-xyz.database.cloud.databricks.com", + pg_username="user@company.com", + rotation_interval=50 # minutes +) + +# Run once +rotator.run_once() + +# Or run as daemon +rotator.run_daemon() +``` + +## Configuration + +### Environment Variables + +| Variable | Description | Required | +|----------|-------------|----------| +| `DATABRICKS_HOST` | Workspace URL | Yes | +| `DATABRICKS_PG_HOST` | PostgreSQL hostname | Yes | +| `DATABRICKS_PG_USERNAME` | PostgreSQL username | Yes | +| `DATABRICKS_CLIENT_ID` | OAuth client ID (M2M) | No | +| `DATABRICKS_CLIENT_SECRET` | OAuth client secret (M2M) | No | + +### Command-Line Arguments + +``` +--workspace-url URL Databricks workspace URL +--pg-host HOST PostgreSQL hostname +--pg-port PORT PostgreSQL port (default: 5432) +--pg-database DB Database name (default: databricks_postgres) +--pg-username USER PostgreSQL username +--pgpass-file PATH Path to .pgpass file (default: ~/.pgpass) +--log-file PATH Log file path (default: ~/.databricks_oauth_rotator.log) +--interval MINUTES Rotation interval (default: 50) +--once Run once and exit (test mode) +``` + +## Monitoring + +### View Logs + +```bash +# Follow logs in real-time +tail -f ~/.databricks_oauth_rotator.log + +# Check recent activity +tail -50 ~/.databricks_oauth_rotator.log + +# Search for errors +grep ERROR ~/.databricks_oauth_rotator.log +``` + +### Log Format + +``` +2025-01-22 14:30:00 - INFO - Starting OAuth token rotation cycle +2025-01-22 14:30:01 - INFO - Successfully obtained token via OAuth M2M +2025-01-22 14:30:01 - INFO - New token details: +2025-01-22 14:30:01 - INFO - - Subject: user@company.com +2025-01-22 14:30:01 - INFO - - Expires at: 2025-01-22T15:30:01 +2025-01-22 14:30:01 - INFO - - Valid for: 60 minutes +2025-01-22 14:30:01 - INFO - Successfully updated /Users/user/.pgpass +2025-01-22 14:30:01 - INFO - Token rotation completed successfully +``` + +## Troubleshooting + +### Service Not Starting + +Check the error logs: + +```bash +cat ~/.databricks_oauth_rotator_stderr.log +``` + +Common issues: +- Missing environment variables +- Python dependencies not installed +- Authentication not configured + +### Token Rotation Failing + +1. **Verify authentication:** + ```bash + # For OAuth M2M + echo $DATABRICKS_CLIENT_ID + echo $DATABRICKS_CLIENT_SECRET + + # For CLI + databricks auth login --host https://your-workspace.cloud.databricks.com + ``` + +2. **Test manually:** + ```bash + databricks-oauth-rotator --once + ``` + +3. **Check logs:** + ```bash + tail -100 ~/.databricks_oauth_rotator.log + ``` + +### .pgpass Not Updating + +1. **Check file permissions:** + ```bash + ls -la ~/.pgpass + # Should be: -rw------- (0600) + ``` + +2. **Verify service is running:** + ```bash + databricks-oauth-status + ``` + +## Architecture + +### Components + +- **`rotator.py`** - Core rotation logic +- **`cli.py`** - Command-line interface +- **`install.py`** - Service installation and management +- **`templates/`** - LaunchAgent/systemd templates + +### Authentication Flow + +1. **OAuth M2M Flow** (Production): + ``` + POST {workspace}/oidc/v1/token + Authorization: Basic {client_id}:{client_secret} + Body: grant_type=client_credentials&scope=all-apis + -> Returns: access_token (valid 60 minutes) + ``` + +2. **Databricks CLI Flow** (Development): + ``` + databricks auth token --host {workspace} + -> Uses stored refresh token + -> Returns: access_token (valid 60 minutes) + ``` + +### File Updates + +The `.pgpass` file is updated atomically to prevent corruption: + +1. Write new token to temporary file +2. Set permissions to 0600 (owner read/write only) +3. Atomic rename to `.pgpass` + +## Security + +- **Short-lived tokens:** Access tokens expire after 60 minutes +- **Proactive rotation:** Tokens rotated at 50 minutes (10-minute safety margin) +- **Secure storage:** `.pgpass` file has restricted permissions (0600) +- **Atomic updates:** Prevents file corruption during updates +- **No credentials in code:** Uses environment variables and OAuth flows + +## Platform Support + +| Platform | Service Type | Status | +|----------|-------------|--------| +| macOS | LaunchAgent | Supported | +| Linux | systemd | Supported | +| Windows | - | Not yet supported | + +## References + +- [Databricks OAuth M2M Documentation](https://docs.databricks.com/en/dev-tools/auth/oauth-m2m.html) +- [Databricks OAuth U2M Documentation](https://docs.databricks.com/en/dev-tools/auth/oauth-u2m.html) +- [Databricks CLI Authentication](https://docs.databricks.com/en/dev-tools/cli/authentication.html) +- [PostgreSQL .pgpass Documentation](https://www.postgresql.org/docs/current/libpq-pgpass.html) + +## License + +This project is licensed under the Apache License 2.0. diff --git a/oauth_auto_token_rotation/__init__.py b/oauth_auto_token_rotation/__init__.py new file mode 100644 index 00000000..84b38743 --- /dev/null +++ b/oauth_auto_token_rotation/__init__.py @@ -0,0 +1,14 @@ +""" +Databricks OAuth Auto Token Rotation + +Automatic OAuth token rotation for Databricks PostgreSQL (Lakebase) and other services. +Eliminates the need for manual token updates by running as a background service. +""" + +__version__ = "1.0.0" +__author__ = "Databricks Community" +__license__ = "Apache-2.0" + +from .rotator import DatabricksOAuthRotator + +__all__ = ["DatabricksOAuthRotator"] diff --git a/oauth_auto_token_rotation/cli.py b/oauth_auto_token_rotation/cli.py new file mode 100644 index 00000000..5ecbab6d --- /dev/null +++ b/oauth_auto_token_rotation/cli.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +""" +Command-line interface for Databricks OAuth Token Rotator +""" + +import sys +import argparse +from .rotator import DatabricksOAuthRotator + + +def main(): + """Main CLI entry point""" + parser = argparse.ArgumentParser( + description='Databricks PostgreSQL OAuth Token Auto-Rotation Service', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run once (test mode) + databricks-oauth-rotator --once + + # Run as daemon with custom interval + databricks-oauth-rotator --interval 45 + + # Specify all parameters + databricks-oauth-rotator \\ + --workspace-url https://workspace.cloud.databricks.com \\ + --pg-host instance-xyz.database.cloud.databricks.com \\ + --pg-username user@company.com \\ + --once + +Environment Variables: + DATABRICKS_HOST Workspace URL + DATABRICKS_CLIENT_ID OAuth client ID (for M2M) + DATABRICKS_CLIENT_SECRET OAuth client secret (for M2M) + DATABRICKS_PG_HOST PostgreSQL hostname + DATABRICKS_PG_USERNAME PostgreSQL username + """ + ) + + # Mode arguments + parser.add_argument( + '--once', + action='store_true', + help='Run once and exit (for testing)' + ) + + # Configuration arguments + parser.add_argument( + '--workspace-url', + help='Databricks workspace URL (or set DATABRICKS_HOST)' + ) + parser.add_argument( + '--client-id', + help='OAuth client ID for M2M auth (or set DATABRICKS_CLIENT_ID)' + ) + parser.add_argument( + '--client-secret', + help='OAuth client secret for M2M auth (or set DATABRICKS_CLIENT_SECRET)' + ) + parser.add_argument( + '--pg-host', + help='PostgreSQL hostname (or set DATABRICKS_PG_HOST)' + ) + parser.add_argument( + '--pg-port', + default='5432', + help='PostgreSQL port (default: 5432)' + ) + parser.add_argument( + '--pg-database', + default='databricks_postgres', + help='PostgreSQL database name (default: databricks_postgres)' + ) + parser.add_argument( + '--pg-username', + help='PostgreSQL username (or set DATABRICKS_PG_USERNAME)' + ) + parser.add_argument( + '--pgpass-file', + default='~/.pgpass', + help='Path to .pgpass file (default: ~/.pgpass)' + ) + parser.add_argument( + '--log-file', + default='~/.databricks_oauth_rotator.log', + help='Path to log file (default: ~/.databricks_oauth_rotator.log)' + ) + parser.add_argument( + '--interval', + type=int, + default=50, + help='Rotation interval in minutes (default: 50)' + ) + + args = parser.parse_args() + + try: + # Create rotator instance + rotator = DatabricksOAuthRotator( + workspace_url=args.workspace_url, + client_id=args.client_id, + client_secret=args.client_secret, + pg_host=args.pg_host, + pg_port=args.pg_port, + pg_database=args.pg_database, + pg_username=args.pg_username, + pgpass_file=args.pgpass_file, + log_file=args.log_file, + rotation_interval=args.interval + ) + + if args.once: + # Run once and exit + success = rotator.run_once() + sys.exit(0 if success else 1) + else: + # Run as daemon + try: + rotator.run_daemon() + except KeyboardInterrupt: + rotator.logger.info("Interrupted by user") + sys.exit(0) + + except ValueError as e: + print(f"Configuration error: {e}", file=sys.stderr) + print("\nUse --help for usage information", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/oauth_auto_token_rotation/install.py b/oauth_auto_token_rotation/install.py new file mode 100644 index 00000000..3738087e --- /dev/null +++ b/oauth_auto_token_rotation/install.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +""" +Installation helper for setting up the OAuth rotation service as a background daemon +""" + +import os +import sys +import shutil +import subprocess +from pathlib import Path +from typing import Optional, Dict + + +class ServiceInstaller: + """Installs and manages the OAuth rotation service""" + + def __init__(self): + self.home = Path.home() + self.service_name = "com.databricks.oauth.rotator" + + # Detect platform + self.is_macos = sys.platform == 'darwin' + self.is_linux = sys.platform.startswith('linux') + + if self.is_macos: + self.service_dir = self.home / "Library" / "LaunchAgents" + self.service_file = self.service_dir / f"{self.service_name}.plist" + elif self.is_linux: + self.service_dir = self.home / ".config" / "systemd" / "user" + self.service_file = self.service_dir / f"{self.service_name}.service" + else: + raise RuntimeError(f"Unsupported platform: {sys.platform}") + + def get_python_path(self) -> str: + """Get the current Python interpreter path""" + return sys.executable + + def generate_launchd_plist( + self, + env_vars: Optional[Dict[str, str]] = None, + extra_args: Optional[list] = None + ) -> str: + """Generate macOS LaunchAgent plist content""" + python_path = self.get_python_path() + + # Build environment variables section + env_section = "" + if env_vars: + for key, value in env_vars.items(): + env_section += f""" {key} + {value} +""" + + # Add PATH + path_dirs = [ + str(Path(python_path).parent), + "/opt/homebrew/bin", + "/usr/local/bin", + "/usr/bin", + "/bin", + "/usr/sbin", + "/sbin" + ] + env_section += f""" PATH + {':'.join(path_dirs)}""" + + # Build extra arguments + args_section = "" + if extra_args: + for arg in extra_args: + args_section += f""" {arg} +""" + + plist_content = f""" + + + + Label + {self.service_name} + + ProgramArguments + + {python_path} + -m + databricks_oauth_rotator.cli +{args_section} + + RunAtLoad + + + KeepAlive + + + StandardOutPath + {self.home}/.databricks_oauth_rotator_stdout.log + + StandardErrorPath + {self.home}/.databricks_oauth_rotator_stderr.log + + EnvironmentVariables + +{env_section} + + + WorkingDirectory + {self.home} + + ProcessType + Background + + Nice + 1 + + +""" + return plist_content + + def generate_systemd_service( + self, + env_vars: Optional[Dict[str, str]] = None, + extra_args: Optional[list] = None + ) -> str: + """Generate Linux systemd service content""" + python_path = self.get_python_path() + + # Build environment variables + env_section = "" + if env_vars: + for key, value in env_vars.items(): + env_section += f'Environment="{key}={value}"\n' + + # Build command with extra arguments + cmd_args = "" + if extra_args: + cmd_args = " " + " ".join(extra_args) + + service_content = f"""[Unit] +Description=Databricks OAuth Token Auto-Rotation Service +After=network.target + +[Service] +Type=simple +ExecStart={python_path} -m databricks_oauth_rotator.cli{cmd_args} +Restart=always +RestartSec=10 +{env_section} +StandardOutput=append:{self.home}/.databricks_oauth_rotator_stdout.log +StandardError=append:{self.home}/.databricks_oauth_rotator_stderr.log + +[Install] +WantedBy=default.target +""" + return service_content + + def install( + self, + env_vars: Optional[Dict[str, str]] = None, + extra_args: Optional[list] = None + ): + """Install the service""" + print(f"Installing OAuth rotation service on {sys.platform}...") + + # Create service directory + self.service_dir.mkdir(parents=True, exist_ok=True) + + # Generate service file + if self.is_macos: + content = self.generate_launchd_plist(env_vars, extra_args) + else: + content = self.generate_systemd_service(env_vars, extra_args) + + # Write service file + with open(self.service_file, 'w') as f: + f.write(content) + + print(f"āœ“ Created service file: {self.service_file}") + + # Load/enable service + self.start() + + print("\nāœ“ Service installed and started successfully!") + print(f"\nMonitor logs:") + print(f" tail -f ~/.databricks_oauth_rotator.log") + + def uninstall(self): + """Uninstall the service""" + print("Uninstalling OAuth rotation service...") + + # Stop service + self.stop() + + # Remove service file + if self.service_file.exists(): + self.service_file.unlink() + print(f"āœ“ Removed service file: {self.service_file}") + + print("āœ“ Service uninstalled successfully!") + + def start(self): + """Start the service""" + if self.is_macos: + subprocess.run(['launchctl', 'load', str(self.service_file)], check=False) + print("āœ“ Service started (macOS LaunchAgent)") + else: + subprocess.run(['systemctl', '--user', 'enable', f'{self.service_name}.service'], check=False) + subprocess.run(['systemctl', '--user', 'start', f'{self.service_name}.service'], check=False) + print("āœ“ Service started (systemd)") + + def stop(self): + """Stop the service""" + if self.is_macos: + subprocess.run(['launchctl', 'unload', str(self.service_file)], check=False, stderr=subprocess.DEVNULL) + print("āœ“ Service stopped (macOS LaunchAgent)") + else: + subprocess.run(['systemctl', '--user', 'stop', f'{self.service_name}.service'], check=False) + print("āœ“ Service stopped (systemd)") + + def restart(self): + """Restart the service""" + print("Restarting service...") + self.stop() + self.start() + + def status(self): + """Check service status""" + if self.is_macos: + result = subprocess.run( + ['launchctl', 'list'], + capture_output=True, + text=True + ) + if self.service_name in result.stdout: + print("āœ“ Service is running") + for line in result.stdout.split('\n'): + if self.service_name in line: + print(f" {line}") + else: + print("āœ— Service is not running") + else: + subprocess.run(['systemctl', '--user', 'status', f'{self.service_name}.service']) + + +def install_command(): + """CLI command for installing the service""" + import argparse + + parser = argparse.ArgumentParser(description='Install Databricks OAuth rotation service') + parser.add_argument('--workspace-url', help='Databricks workspace URL') + parser.add_argument('--pg-host', help='PostgreSQL hostname') + parser.add_argument('--pg-username', help='PostgreSQL username') + parser.add_argument('--interval', type=int, default=50, help='Rotation interval in minutes') + + args = parser.parse_args() + + # Prepare environment variables + env_vars = {} + if args.workspace_url: + env_vars['DATABRICKS_HOST'] = args.workspace_url + if args.pg_host: + env_vars['DATABRICKS_PG_HOST'] = args.pg_host + if args.pg_username: + env_vars['DATABRICKS_PG_USERNAME'] = args.pg_username + + # Prepare extra arguments + extra_args = [] + if args.interval != 50: + extra_args.extend(['--interval', str(args.interval)]) + + # Install + installer = ServiceInstaller() + installer.install(env_vars=env_vars if env_vars else None, extra_args=extra_args if extra_args else None) + + +def uninstall_command(): + """CLI command for uninstalling the service""" + installer = ServiceInstaller() + installer.uninstall() + + +def status_command(): + """CLI command for checking service status""" + installer = ServiceInstaller() + installer.status() + + +def restart_command(): + """CLI command for restarting the service""" + installer = ServiceInstaller() + installer.restart() + + +if __name__ == '__main__': + install_command() diff --git a/oauth_auto_token_rotation/requirements.txt b/oauth_auto_token_rotation/requirements.txt new file mode 100644 index 00000000..66f6ffd3 --- /dev/null +++ b/oauth_auto_token_rotation/requirements.txt @@ -0,0 +1,2 @@ +PyJWT>=2.0.0 +requests>=2.25.0 diff --git a/oauth_auto_token_rotation/rotator.py b/oauth_auto_token_rotation/rotator.py new file mode 100644 index 00000000..416628ff --- /dev/null +++ b/oauth_auto_token_rotation/rotator.py @@ -0,0 +1,405 @@ +#!/usr/bin/env python3 +""" +Databricks OAuth Token Rotator + +Core rotation logic for automatically refreshing OAuth tokens +and updating PostgreSQL .pgpass files. +""" + +import os +import sys +import time +import json +import subprocess +import logging +import signal +import jwt +import requests +from datetime import datetime +from pathlib import Path +from typing import Optional, Dict, Any +from logging.handlers import RotatingFileHandler + + +class DatabricksOAuthRotator: + """Manages automatic OAuth token rotation for Databricks services""" + + def __init__( + self, + workspace_url: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + pg_host: Optional[str] = None, + pg_port: str = "5432", + pg_database: str = "databricks_postgres", + pg_username: Optional[str] = None, + pgpass_file: str = "~/.pgpass", + log_file: str = "~/.databricks_oauth_rotator.log", + rotation_interval: int = 50 + ): + """ + Initialize the OAuth rotator. + + Args: + workspace_url: Databricks workspace URL (e.g., https://workspace.cloud.databricks.com) + client_id: OAuth client ID (for M2M auth) + client_secret: OAuth client secret (for M2M auth) + pg_host: PostgreSQL hostname + pg_port: PostgreSQL port (default: 5432) + pg_database: PostgreSQL database name (default: databricks_postgres) + pg_username: PostgreSQL username + pgpass_file: Path to .pgpass file (default: ~/.pgpass) + log_file: Path to log file (default: ~/.databricks_oauth_rotator.log) + rotation_interval: Rotation interval in minutes (default: 50) + """ + # Databricks configuration (with environment variable fallbacks) + self.workspace_url = workspace_url or os.getenv('DATABRICKS_HOST') + self.client_id = client_id or os.getenv('DATABRICKS_CLIENT_ID') + self.client_secret = client_secret or os.getenv('DATABRICKS_CLIENT_SECRET') + + # PostgreSQL configuration + self.pg_host = pg_host or os.getenv('DATABRICKS_PG_HOST') + self.pg_port = pg_port + self.pg_database = pg_database + self.pg_username = pg_username or os.getenv('DATABRICKS_PG_USERNAME') + + # File paths + self.pgpass_file = Path(pgpass_file).expanduser() + self.log_file = Path(log_file).expanduser() + + # Rotation settings + self.rotation_interval = rotation_interval * 60 # Convert to seconds + + # Validate required fields + if not self.workspace_url: + raise ValueError("workspace_url is required (or set DATABRICKS_HOST)") + if not self.pg_host: + raise ValueError("pg_host is required (or set DATABRICKS_PG_HOST)") + if not self.pg_username: + raise ValueError("pg_username is required (or set DATABRICKS_PG_USERNAME)") + + # Setup logging + self._setup_logging() + + # Signal handling + self.running = True + signal.signal(signal.SIGTERM, self._signal_handler) + signal.signal(signal.SIGINT, self._signal_handler) + + def _setup_logging(self): + """Configure logging with rotation""" + self.logger = logging.getLogger('DatabricksOAuthRotator') + self.logger.setLevel(logging.INFO) + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.INFO) + console_format = logging.Formatter( + '%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + console_handler.setFormatter(console_format) + + # File handler with rotation (10MB max, 5 backups) + file_handler = RotatingFileHandler( + self.log_file, + maxBytes=10*1024*1024, + backupCount=5 + ) + file_handler.setLevel(logging.DEBUG) + file_format = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + file_handler.setFormatter(file_format) + + self.logger.addHandler(console_handler) + self.logger.addHandler(file_handler) + + def _signal_handler(self, signum, frame): + """Handle shutdown signals gracefully""" + self.logger.info(f"Received signal {signum}. Shutting down gracefully...") + self.running = False + + def get_token_info(self, token: str) -> Dict[str, Any]: + """Extract information from JWT token""" + try: + decoded = jwt.decode(token, options={"verify_signature": False}) + + exp_timestamp = decoded.get('exp') + iat_timestamp = decoded.get('iat') + + info = { + 'subject': decoded.get('sub'), + 'client_id': decoded.get('client_id'), + 'scopes': decoded.get('scope', '').split(' '), + 'issuer': decoded.get('iss'), + 'audience': decoded.get('aud'), + } + + if exp_timestamp: + expiry = datetime.fromtimestamp(exp_timestamp) + info['expires_at'] = expiry.isoformat() + info['expires_in_minutes'] = int( + (expiry - datetime.now()).total_seconds() / 60 + ) + info['is_expired'] = datetime.now() >= expiry + + if iat_timestamp: + info['issued_at'] = datetime.fromtimestamp(iat_timestamp).isoformat() + + return info + + except Exception as e: + self.logger.error(f"Error decoding token: {e}") + return {'error': str(e)} + + def get_new_token_via_cli(self) -> Optional[str]: + """ + Get new OAuth token using Databricks CLI + This is the recommended method for development/personal use + """ + self.logger.info("Obtaining new token via Databricks CLI...") + + try: + # Check if databricks CLI is installed + subprocess.run( + ['databricks', '--version'], + capture_output=True, + check=True, + timeout=10 + ) + except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired) as e: + self.logger.error(f"Databricks CLI not available: {e}") + return None + + try: + # Get OAuth token + result = subprocess.run( + ['databricks', 'auth', 'token', '--host', self.workspace_url], + capture_output=True, + text=True, + check=False, + timeout=30 + ) + + if result.returncode != 0: + self.logger.warning("Not logged in to Databricks CLI. Attempting login...") + login_result = subprocess.run( + ['databricks', 'auth', 'login', '--host', self.workspace_url], + check=False, + timeout=60 + ) + + if login_result.returncode != 0: + self.logger.error("Databricks CLI login failed") + return None + + # Retry token fetch + result = subprocess.run( + ['databricks', 'auth', 'token', '--host', self.workspace_url], + capture_output=True, + text=True, + check=True, + timeout=30 + ) + + # Parse token from output + output = result.stdout.strip() + if output.startswith('eyJ'): # JWT tokens start with eyJ + self.logger.info("Successfully obtained token via CLI") + return output + else: + try: + data = json.loads(output) + token = data.get('access_token') + if token: + self.logger.info("Successfully obtained token via CLI") + return token + except json.JSONDecodeError: + self.logger.error(f"Unexpected CLI output: {output[:100]}") + return None + + except subprocess.TimeoutExpired: + self.logger.error("Databricks CLI command timed out") + return None + except Exception as e: + self.logger.error(f"Error getting token via CLI: {e}") + return None + + def get_new_token_via_oauth(self) -> Optional[str]: + """ + Get new OAuth token using OAuth M2M client credentials flow + This is the recommended method for production/automated use + """ + if not self.client_id or not self.client_secret: + self.logger.debug("OAuth M2M credentials not configured") + return None + + self.logger.info("Obtaining new token via OAuth M2M flow...") + + # Workspace-level token endpoint + token_endpoint = f"{self.workspace_url}/oidc/v1/token" + + try: + response = requests.post( + token_endpoint, + auth=(self.client_id, self.client_secret), + data={ + 'grant_type': 'client_credentials', + 'scope': 'all-apis' + }, + timeout=30 + ) + + response.raise_for_status() + + data = response.json() + access_token = data.get('access_token') + + if access_token: + self.logger.info("Successfully obtained token via OAuth M2M") + return access_token + else: + self.logger.error("No access_token in OAuth response") + return None + + except requests.exceptions.RequestException as e: + self.logger.error(f"OAuth token request failed: {e}") + return None + + def get_new_token(self) -> Optional[str]: + """ + Get new OAuth token using the best available method + Priority: OAuth M2M > Databricks CLI + """ + # Try OAuth M2M first (production method) + token = self.get_new_token_via_oauth() + if token: + return token + + # Fallback to CLI (development method) + self.logger.info("OAuth M2M not available, falling back to Databricks CLI") + token = self.get_new_token_via_cli() + if token: + return token + + self.logger.error("Failed to obtain token via any method") + return None + + def update_pgpass_file(self, new_token: str) -> bool: + """ + Update .pgpass file with new OAuth token atomically + Format: hostname:port:database:username:password + """ + try: + # Create .pgpass entry + pgpass_entry = ( + f"{self.pg_host}:{self.pg_port}:{self.pg_database}:" + f"{self.pg_username}:{new_token}\n" + ) + + # Read existing .pgpass if it exists + existing_lines = [] + if self.pgpass_file.exists(): + with open(self.pgpass_file, 'r') as f: + existing_lines = f.readlines() + + # Update or append the entry for this instance + updated = False + for i, line in enumerate(existing_lines): + if line.startswith(f"{self.pg_host}:{self.pg_port}:{self.pg_database}:"): + existing_lines[i] = pgpass_entry + updated = True + break + + if not updated: + existing_lines.append(pgpass_entry) + + # Write atomically using a temporary file + temp_file = self.pgpass_file.with_suffix('.tmp') + with open(temp_file, 'w') as f: + f.writelines(existing_lines) + + # Set correct permissions (0600 - owner read/write only) + os.chmod(temp_file, 0o600) + + # Atomic rename + temp_file.replace(self.pgpass_file) + + self.logger.info(f"Successfully updated {self.pgpass_file}") + return True + + except Exception as e: + self.logger.error(f"Error updating .pgpass file: {e}") + return False + + def rotate_token(self) -> bool: + """Execute one token rotation cycle""" + self.logger.info("=" * 70) + self.logger.info("Starting OAuth token rotation cycle") + self.logger.info("=" * 70) + + # Get new token + new_token = self.get_new_token() + if not new_token: + self.logger.error("Failed to obtain new token") + return False + + # Verify and log token info + token_info = self.get_token_info(new_token) + if 'error' not in token_info: + self.logger.info(f"New token details:") + self.logger.info(f" - Subject: {token_info.get('subject')}") + self.logger.info(f" - Expires at: {token_info.get('expires_at')}") + self.logger.info(f" - Valid for: {token_info.get('expires_in_minutes')} minutes") + else: + self.logger.warning(f"Could not verify token: {token_info['error']}") + + # Update .pgpass file + if self.update_pgpass_file(new_token): + self.logger.info("Token rotation completed successfully") + self.logger.info("=" * 70) + return True + else: + self.logger.error("Failed to update .pgpass file") + self.logger.error("=" * 70) + return False + + def run_once(self) -> bool: + """Run one rotation cycle and exit""" + self.logger.info("Running in one-shot mode") + return self.rotate_token() + + def run_daemon(self): + """Run as a daemon, rotating tokens every N minutes""" + self.logger.info("Starting OAuth token rotation daemon") + self.logger.info(f"Rotation interval: {self.rotation_interval // 60} minutes") + self.logger.info(f"Workspace URL: {self.workspace_url}") + self.logger.info(f"PostgreSQL host: {self.pg_host}") + self.logger.info(f"Log file: {self.log_file}") + self.logger.info(f"PID: {os.getpid()}") + + # Initial rotation + self.rotate_token() + + # Continuous rotation loop + while self.running: + try: + # Sleep in small intervals to allow graceful shutdown + sleep_remaining = self.rotation_interval + while sleep_remaining > 0 and self.running: + sleep_time = min(60, sleep_remaining) + time.sleep(sleep_time) + sleep_remaining -= sleep_time + + if self.running: + success = self.rotate_token() + if not success: + self.logger.warning("Rotation failed, will retry on next cycle") + + except Exception as e: + self.logger.error(f"Unexpected error in daemon loop: {e}", exc_info=True) + time.sleep(60) + + self.logger.info("Daemon stopped") diff --git a/oauth_auto_token_rotation/templates/launchd.plist.template b/oauth_auto_token_rotation/templates/launchd.plist.template new file mode 100644 index 00000000..ae044aa6 --- /dev/null +++ b/oauth_auto_token_rotation/templates/launchd.plist.template @@ -0,0 +1,42 @@ + + + + + Label + com.databricks.oauth.rotator + + ProgramArguments + + {{PYTHON_PATH}} + -m + databricks_oauth_rotator.cli + {{EXTRA_ARGS}} + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + {{HOME}}/.databricks_oauth_rotator_stdout.log + + StandardErrorPath + {{HOME}}/.databricks_oauth_rotator_stderr.log + + EnvironmentVariables + + {{ENV_VARS}} + + + WorkingDirectory + {{HOME}} + + ProcessType + Background + + Nice + 1 + +