diff --git a/README.md b/README.md index 8a0336c..2e50555 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,28 @@ nearby_stops = Stop.objects.filter( ) ``` +## 🧩 GTFS Schedule β€” Admin & Query Patterns (no composite PKs) + +### 🎯 Context +En esta versiΓ³n del mΓ³dulo `gtfs.schedule`, por consejo de Fabian Abarca, se decidiΓ³ **no usar composite primary keys (CPKs)** debido a las limitaciones de Django Admin y las complejidades al definir ForeignKeys y relaciones. Cada entidad mantiene una clave primaria simple (`id` o `_id`). + +--- + +### 🧱 Model Hierarchy +```text +FeedInfoSchedule + β”œβ”€β”€ AgencySchedule + β”‚ └── RouteSchedule + β”‚ └── TripSchedule + β”‚ β”œβ”€β”€ StopTimeSchedule + β”‚ └── ShapeSchedule + └── CalendarSchedule + └── CalendarDateSchedule + + + + + ### Processing GTFS Realtime Data ```python diff --git a/gtfs/__init__.py b/gtfs/__init__.py index c7bab38..7506c21 100644 --- a/gtfs/__init__.py +++ b/gtfs/__init__.py @@ -1,10 +1,32 @@ -__all__ = [ - "__version__", -] +# gtfs/__init__.py -__version__ = "0.1.0" +__all__ = ["__version__", "test_editable_install"] +__version__ = "0.1.0" def test_editable_install(): """Simple function to test if editable installation is working.""" return "Version 2 - Changes are now reflected immediately!" + + +# ---------------------------------------------------------- +# Carga diferida de modelos para evitar errores de registro +# ---------------------------------------------------------- +import importlib + +def autodiscover_models(): + """Carga los modelos GTFS (Schedule) solo cuando Django ya inicializΓ³ las apps.""" + try: + importlib.import_module("gtfs.models_schedule") + except ModuleNotFoundError: + # Si aΓΊn no existe el archivo o no estΓ‘ listo, no romper el paquete + pass + + +# ---------------------------------------------------------- +# Exponer los nombres de los modelos Schedule +# ---------------------------------------------------------- +__all__.extend([ + "Agency", "Route", "Trip", "Stop", "StopTime", + "Calendar", "CalendarDate", "Shape", "FeedInfo", +]) diff --git a/gtfs/admin.py b/gtfs/admin.py index cca42d3..763bd2c 100644 --- a/gtfs/admin.py +++ b/gtfs/admin.py @@ -52,3 +52,28 @@ class StopAdmin(admin.GISModelAdmin): admin.site.register(TripUpdate) admin.site.register(StopTimeUpdate) admin.site.register(VehiclePosition, admin.GISModelAdmin) + + + +#GTFS Schedule implementation +from .models_schedule import ( + FeedInfoSchedule, + AgencySchedule, + RouteSchedule, + CalendarSchedule, + TripSchedule, + StopSchedule, + StopTimeSchedule, + ShapeSchedule, + CalendarDateSchedule, +) + +admin.site.register(FeedInfoSchedule) +admin.site.register(AgencySchedule) +admin.site.register(RouteSchedule) +admin.site.register(CalendarSchedule) +admin.site.register(TripSchedule) +admin.site.register(StopSchedule) +admin.site.register(StopTimeSchedule) +admin.site.register(ShapeSchedule) +admin.site.register(CalendarDateSchedule) diff --git a/gtfs/fixtures/create_fixture.py b/gtfs/fixtures/create_fixture.py new file mode 100644 index 0000000..23c5464 --- /dev/null +++ b/gtfs/fixtures/create_fixture.py @@ -0,0 +1,166 @@ +""" +Generate minimal deterministic fixtures for GTFS Schedule +--------------------------------------------------------- +Usage: + python gtfs/fixtures/create_fixture.py +or with options: + python gtfs/fixtures/create_fixture.py --seed 42 --output fixtures/schedule_fixture.json +""" + +import os +import sys +import json +import random +import django +from datetime import date, time +from pathlib import Path + +# ───────────────────────────── +# Django setup +# ───────────────────────────── +BASE_DIR = Path(__file__).resolve().parents[2] +sys.path.append(str(BASE_DIR)) +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tests.settings") +django.setup() + +from gtfs.models import ( + FeedInfoSchedule, + AgencySchedule, + RouteSchedule, + CalendarSchedule, + TripSchedule, + StopSchedule, + StopTimeSchedule, + ShapeSchedule, + CalendarDateSchedule, +) + +# ───────────────────────────── +# Fixture generator +# ───────────────────────────── +def create_fixtures(seed: int = 42, output_path: str = "gtfs/fixtures/schedule_fixture.json"): + """Generate minimal deterministic GTFS Schedule dataset.""" + random.seed(seed) + print(f"🎯 Generating GTFS Schedule fixtures (seed={seed})") + + # Clean previous data + FeedInfoSchedule.objects.all().delete() + + # Feed + feed = FeedInfoSchedule.objects.create( + feed_publisher_name="UCR Feed Demo", + feed_publisher_url="https://ucr.ac.cr", + feed_lang="es", + feed_version="v1.0" + ) + + # Agency + agency = AgencySchedule.objects.create( + feed=feed, + agency_id="UCR", + agency_name="Universidad de Costa Rica", + agency_url="https://ucr.ac.cr", + agency_timezone="America/Costa_Rica" + ) + + # Route + route = RouteSchedule.objects.create( + feed=feed, + route_id="R1", + agency=agency, + route_short_name="1", + route_long_name="Campus a San Pedro", + route_type=3 + ) + + # Calendar + calendar = CalendarSchedule.objects.create( + feed=feed, + service_id="S2025", + monday=1, tuesday=1, wednesday=1, thursday=1, friday=1, + saturday=0, sunday=0, + start_date=date(2025, 3, 1), + end_date=date(2025, 12, 31) + ) + + # Shape + shape = ShapeSchedule.objects.create( + feed=feed, + shape_id="Shape1", + shape_pt_lat=9.936, + shape_pt_lon=-84.054, + shape_pt_sequence=1 + ) + + # Stop + stop = StopSchedule.objects.create( + feed=feed, + stop_id="SP01", + stop_name="Parada San Pedro", + stop_lat=9.936, + stop_lon=-84.054 + ) + + # Trip + trip = TripSchedule.objects.create( + feed=feed, + trip_id="T100", + route=route, + service=calendar, + trip_headsign="San Pedro", + shape=shape + ) + + # StopTime + stoptime = StopTimeSchedule.objects.create( + feed=feed, + trip=trip, + stop=stop, + arrival_time=time(7, 30), + departure_time=time(7, 31), + stop_sequence=1 + ) + + # CalendarDate + caldate = CalendarDateSchedule.objects.create( + feed=feed, + service=calendar, + date=date(2025, 4, 1), + exception_type=1 + ) + + # Output summary + data = { + "feed": feed.feed_publisher_name, + "agency": agency.agency_name, + "route": route.route_long_name, + "trip": trip.trip_headsign, + "stop": stop.stop_name, + "calendar_date": str(caldate.date) + } + + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=4, ensure_ascii=False) + + print(f"βœ… Fixture successfully written β†’ {output_path}") + + +# ───────────────────────────── +# CLI entrypoint +# ───────────────────────────── +if __name__ == "__main__": + seed = 42 + output = "gtfs/fixtures/schedule_fixture.json" + + if "--seed" in sys.argv: + idx = sys.argv.index("--seed") + 1 + if idx < len(sys.argv): + seed = int(sys.argv[idx]) + + if "--output" in sys.argv: + idx = sys.argv.index("--output") + 1 + if idx < len(sys.argv): + output = sys.argv[idx] + + create_fixtures(seed=seed, output_path=output) diff --git a/gtfs/fixtures/schedule.json b/gtfs/fixtures/schedule.json new file mode 100644 index 0000000..14ede1c --- /dev/null +++ b/gtfs/fixtures/schedule.json @@ -0,0 +1,201 @@ +{ + "entities": { + "agency": { + "primary_key": "agency_id", + "fields": { + "agency_id": "string", + "agency_name": "string", + "agency_url": "string", + "agency_timezone": "string", + "agency_lang": "string", + "agency_phone": "string", + "agency_email": "string" + }, + "constraints": { + "agency_timezone": "IANA timezone string", + "agency_lang": "ISO 639-1 code", + "agency_url": "http/https URL" + } + }, + "routes": { + "primary_key": "route_id", + "fields": { + "route_id": "string", + "agency_id": "string", + "route_short_name": "string", + "route_long_name": "string", + "route_desc": "string", + "route_type": "integer", + "route_color": "string", + "route_text_color": "string" + }, + "foreign_keys": { + "agency_id": "agency.agency_id" + }, + "constraints": { + "route_type": "enum {0-7, 11, ...}", + "route_color": "hex RGB", + "route_text_color": "hex RGB" + } + }, + "trips": { + "primary_key": "trip_id", + "fields": { + "trip_id": "string", + "route_id": "string", + "service_id": "string", + "trip_headsign": "string", + "trip_short_name": "string", + "direction_id": "integer", + "block_id": "string", + "shape_id": "string", + "wheelchair_accessible": "integer" + }, + "foreign_keys": { + "route_id": "routes.route_id", + "service_id": "calendar.service_id", + "shape_id": "shapes.shape_id" + }, + "constraints": { + "direction_id": "enum {0,1}", + "wheelchair_accessible": "enum {0=unknown,1=accessible,2=not accessible}" + } + }, + "stops": { + "primary_key": "stop_id", + "fields": { + "stop_id": "string", + "stop_code": "string", + "stop_name": "string", + "stop_desc": "string", + "stop_lat": "float", + "stop_lon": "float", + "zone_id": "string", + "location_type": "integer", + "parent_station": "string", + "stop_timezone": "string", + "wheelchair_boarding": "integer" + }, + "foreign_keys": { + "parent_station": "stops.stop_id" + }, + "constraints": { + "stop_lat": "-90 ≀ value ≀ 90", + "stop_lon": "-180 ≀ value ≀ 180", + "location_type": "enum {0=Stop,1=Station,2=Entrance,...}", + "wheelchair_boarding": "enum {0=unknown,1=accessible,2=not accessible}" + } + }, + "stop_times": { + "primary_key": [ + "trip_id", + "stop_sequence" + ], + "fields": { + "trip_id": "string", + "arrival_time": "time", + "departure_time": "time", + "stop_id": "string", + "stop_sequence": "integer", + "stop_headsign": "string", + "pickup_type": "integer", + "drop_off_type": "integer", + "shape_dist_traveled": "float", + "timepoint": "integer" + }, + "foreign_keys": { + "trip_id": "trips.trip_id", + "stop_id": "stops.stop_id" + }, + "constraints": { + "arrival_time": "HH:MM:SS (can exceed 24h)", + "departure_time": "HH:MM:SS (can exceed 24h)", + "pickup_type": "enum {0,1,2,3}", + "drop_off_type": "enum {0,1,2,3}" + } + }, + "calendar": { + "primary_key": "service_id", + "fields": { + "service_id": "string", + "monday": "integer", + "tuesday": "integer", + "wednesday": "integer", + "thursday": "integer", + "friday": "integer", + "saturday": "integer", + "sunday": "integer", + "start_date": "date", + "end_date": "date" + }, + "constraints": { + "monday": "enum {0=no service,1=service runs}", + "tuesday": "enum {0=no service,1=service runs}", + "wednesday": "enum {0=no service,1=service runs}", + "thursday": "enum {0=no service,1=service runs}", + "friday": "enum {0=no service,1=service runs}", + "saturday": "enum {0=no service,1=service runs}", + "sunday": "enum {0=no service,1=service runs}", + "start_date": "YYYYMMDD", + "end_date": "YYYYMMDD (β‰₯ start_date)" + } + }, + "calendar_dates": { + "primary_key": [ + "service_id", + "date" + ], + "fields": { + "service_id": "string", + "date": "date", + "exception_type": "integer" + }, + "foreign_keys": { + "service_id": "calendar.service_id" + }, + "constraints": { + "date": "YYYYMMDD", + "exception_type": "enum {1=service added,2=service removed}" + } + }, + "shapes": { + "primary_key": [ + "shape_id", + "shape_pt_sequence" + ], + "fields": { + "shape_id": "string", + "shape_pt_lat": "float", + "shape_pt_lon": "float", + "shape_pt_sequence": "integer", + "shape_dist_traveled": "float" + }, + "constraints": { + "shape_pt_lat": "-90 ≀ value ≀ 90", + "shape_pt_lon": "-180 ≀ value ≀ 180", + "shape_pt_sequence": "integer β‰₯ 0", + "shape_dist_traveled": "float β‰₯ 0" + } + }, + "feed_info": { + "fields": { + "feed_publisher_name": "string", + "feed_publisher_url": "string", + "feed_lang": "string", + "feed_version": "string", + "feed_start_date": "date", + "feed_end_date": "date", + "feed_contact_email": "string", + "feed_contact_url": "string" + }, + "constraints": { + "feed_lang": "ISO 639-1 code", + "feed_publisher_url": "http/https URL", + "feed_contact_email": "valid email", + "feed_contact_url": "http/https URL" + } + } + }, + "version": "1.0.0", + "spec": "GTFS Schedule v2.0" +} \ No newline at end of file diff --git a/gtfs/fixtures/schedule_fixture.json b/gtfs/fixtures/schedule_fixture.json new file mode 100644 index 0000000..3737104 --- /dev/null +++ b/gtfs/fixtures/schedule_fixture.json @@ -0,0 +1,8 @@ +{ + "feed": "UCR Feed Demo", + "agency": "Universidad de Costa Rica", + "route": "Campus a San Pedro", + "trip": "San Pedro", + "stop": "Parada San Pedro", + "calendar_date": "2025-04-01" +} \ No newline at end of file diff --git a/gtfs/models.py b/gtfs/models.py index 5391ecc..9d693f3 100644 --- a/gtfs/models.py +++ b/gtfs/models.py @@ -1,4 +1,9 @@ # Add your models here + + + + + # from django.contrib.gis.db import models # Use GeoDjango models when needed import re @@ -85,12 +90,10 @@ class Feed(models.Model): def __str__(self): return self.feed_id - class Agency(models.Model): """One or more transit agencies that provide the data in this feed. Maps to agency.txt in the GTFS feed. """ - id = models.BigAutoField(primary_key=True) feed = models.ForeignKey(Feed, to_field="feed_id", on_delete=models.CASCADE) agency_id = models.CharField( @@ -1086,3 +1089,10 @@ class Alert(models.Model): def __str__(self): return self.alert_id + + + +# ───────────────────────────── +# Added for subissue #4 - Migrate to composite PK (Django 5.2) +# ───────────────────────────── +from .models_schedule import * diff --git a/gtfs/models_schedule.py b/gtfs/models_schedule.py new file mode 100644 index 0000000..0d7138c --- /dev/null +++ b/gtfs/models_schedule.py @@ -0,0 +1,209 @@ +from django.db import models +from django.core.validators import MinValueValidator, MaxValueValidator, RegexValidator + +# ───────────────────────────── +# 1. FeedInfoSchedule +# ───────────────────────────── +class FeedInfoSchedule(models.Model): + feed_publisher_name = models.CharField(max_length=200) + feed_publisher_url = models.URLField() + feed_lang = models.CharField(max_length=10) + feed_version = models.CharField(max_length=50, null=True, blank=True) + feed_start_date = models.DateField(null=True, blank=True) + feed_end_date = models.DateField(null=True, blank=True) + feed_contact_email = models.EmailField(null=True, blank=True) + feed_contact_url = models.URLField(null=True, blank=True) + + def __str__(self): + return f"FeedInfoSchedule {self.feed_publisher_name}" + + +# ───────────────────────────── +# 2. AgencySchedule +# ───────────────────────────── +class AgencySchedule(models.Model): + feed = models.ForeignKey(FeedInfoSchedule, on_delete=models.CASCADE, related_name="agencies") + agency_id = models.CharField(max_length=50) + agency_name = models.CharField(max_length=200) + agency_url = models.URLField() + agency_timezone = models.CharField(max_length=50) + agency_phone = models.CharField(max_length=20, null=True, blank=True) + agency_email = models.EmailField(null=True, blank=True) + + class Meta: + unique_together = ('feed', 'agency_id') + + def __str__(self): + return self.agency_name + + +# ───────────────────────────── +# 3. RouteSchedule +# ───────────────────────────── +class RouteSchedule(models.Model): + feed = models.ForeignKey(FeedInfoSchedule, on_delete=models.CASCADE, related_name="routes") + route_id = models.CharField(max_length=50) + agency = models.ForeignKey(AgencySchedule, on_delete=models.CASCADE, related_name="routes") + route_short_name = models.CharField(max_length=50) + route_long_name = models.CharField(max_length=255) + route_desc = models.TextField(null=True, blank=True) + route_type = models.IntegerField(validators=[MinValueValidator(0), MaxValueValidator(12)]) + route_color = models.CharField( + max_length=6, + validators=[RegexValidator(r"^[0-9A-Fa-f]{6}$", "Debe ser un color HEX vΓ‘lido.")], + null=True, + blank=True, + ) + route_text_color = models.CharField( + max_length=6, + validators=[RegexValidator(r"^[0-9A-Fa-f]{6}$")], + null=True, + blank=True, + ) + + class Meta: + unique_together = ('feed', 'route_id') + + def __str__(self): + return f"{self.route_short_name} - {self.route_long_name}" + + +# ───────────────────────────── +# 4. CalendarSchedule +# ───────────────────────────── +class CalendarSchedule(models.Model): + feed = models.ForeignKey(FeedInfoSchedule, on_delete=models.CASCADE, related_name="calendars") + service_id = models.CharField(max_length=50) + monday = models.IntegerField(choices=[(0, "No service"), (1, "Service runs")]) + tuesday = models.IntegerField(choices=[(0, "No service"), (1, "Service runs")]) + wednesday = models.IntegerField(choices=[(0, "No service"), (1, "Service runs")]) + thursday = models.IntegerField(choices=[(0, "No service"), (1, "Service runs")]) + friday = models.IntegerField(choices=[(0, "No service"), (1, "Service runs")]) + saturday = models.IntegerField(choices=[(0, "No service"), (1, "Service runs")]) + sunday = models.IntegerField(choices=[(0, "No service"), (1, "Service runs")]) + start_date = models.DateField() + end_date = models.DateField() + + class Meta: + unique_together = ('feed', 'service_id') + + def __str__(self): + return self.service_id + + +# ───────────────────────────── +# 5. CalendarDateSchedule (sin CPK) +# ───────────────────────────── +class CalendarDateSchedule(models.Model): + feed = models.ForeignKey('FeedInfoSchedule', on_delete=models.CASCADE, related_name="calendar_dates") + service = models.ForeignKey('CalendarSchedule', on_delete=models.CASCADE, related_name="calendar_dates") + date = models.DateField() + exception_type = models.IntegerField(choices=[(1, "Service added"), (2, "Service removed")]) + + class Meta: + unique_together = ('service', 'date') + + def __str__(self): + return f"{self.service.service_id} - {self.date}" + + +# ───────────────────────────── +# 6. StopSchedule +# ───────────────────────────── +class StopSchedule(models.Model): + feed = models.ForeignKey(FeedInfoSchedule, on_delete=models.CASCADE, related_name="stops") + stop_id = models.CharField(max_length=50) + stop_code = models.CharField(max_length=50, null=True, blank=True) + stop_name = models.CharField(max_length=200) + stop_desc = models.TextField(null=True, blank=True) + stop_lat = models.FloatField(validators=[MinValueValidator(-90), MaxValueValidator(90)]) + stop_lon = models.FloatField(validators=[MinValueValidator(-180), MaxValueValidator(180)]) + zone_id = models.CharField(max_length=50, null=True, blank=True) + location_type = models.IntegerField( + choices=[(0, "Stop"), (1, "Station"), (2, "Entrance/Exit"), (3, "Generic Node"), (4, "Boarding Area")], + default=0 + ) + parent_station = models.ForeignKey('self', on_delete=models.SET_NULL, null=True, blank=True) + stop_timezone = models.CharField(max_length=50, null=True, blank=True) + wheelchair_boarding = models.IntegerField( + choices=[(0, "No info"), (1, "Accessible"), (2, "Not accessible")], + default=0 + ) + + class Meta: + unique_together = ('feed', 'stop_id') + + def __str__(self): + return self.stop_name + + +# ───────────────────────────── +# 7. ShapeSchedule (sin CPK) +# ───────────────────────────── +class ShapeSchedule(models.Model): + feed = models.ForeignKey('FeedInfoSchedule', on_delete=models.CASCADE, related_name="shapes") + shape_id = models.CharField(max_length=50) + shape_pt_sequence = models.PositiveIntegerField() + shape_pt_lat = models.FloatField(validators=[MinValueValidator(-90), MaxValueValidator(90)]) + shape_pt_lon = models.FloatField(validators=[MinValueValidator(-180), MaxValueValidator(180)]) + shape_dist_traveled = models.FloatField(validators=[MinValueValidator(0)], null=True, blank=True) + + class Meta: + unique_together = ('shape_id', 'shape_pt_sequence') + + def __str__(self): + return f"{self.shape_id} ({self.shape_pt_sequence})" + + +# ───────────────────────────── +# 8. TripSchedule +# ───────────────────────────── +class TripSchedule(models.Model): + feed = models.ForeignKey(FeedInfoSchedule, on_delete=models.CASCADE, related_name="trips") + trip_id = models.CharField(max_length=50) + route = models.ForeignKey(RouteSchedule, on_delete=models.CASCADE, related_name="trips") + service = models.ForeignKey(CalendarSchedule, on_delete=models.CASCADE, related_name="trips") + trip_headsign = models.CharField(max_length=255, null=True, blank=True) + trip_short_name = models.CharField(max_length=50, null=True, blank=True) + direction_id = models.IntegerField(choices=[(0, "Outbound"), (1, "Inbound")], null=True, blank=True) + block_id = models.CharField(max_length=50, null=True, blank=True) + shape = models.ForeignKey(ShapeSchedule, on_delete=models.SET_NULL, null=True, blank=True) + wheelchair_accessible = models.IntegerField( + choices=[(0, "No info"), (1, "Accessible"), (2, "Not accessible")], + default=0 + ) + + class Meta: + unique_together = ('feed', 'trip_id') + + def __str__(self): + return f"{self.trip_id} - {self.trip_headsign or self.trip_short_name or ''}" + + +# ───────────────────────────── +# 9. StopTimeSchedule (sin CPK) +# ───────────────────────────── +class StopTimeSchedule(models.Model): + feed = models.ForeignKey('FeedInfoSchedule', on_delete=models.CASCADE, related_name="stop_times") + trip = models.ForeignKey('TripSchedule', on_delete=models.CASCADE, related_name="stop_times") + stop = models.ForeignKey('StopSchedule', on_delete=models.CASCADE, related_name="stop_times") + arrival_time = models.TimeField() + departure_time = models.TimeField() + stop_sequence = models.IntegerField() + stop_headsign = models.CharField(max_length=255, null=True, blank=True) + pickup_type = models.IntegerField( + choices=[(0, "Regular"), (1, "No pickup"), (2, "Phone agency"), (3, "Coordinate with driver")], + default=0 + ) + drop_off_type = models.IntegerField( + choices=[(0, "Regular"), (1, "No drop off"), (2, "Phone agency"), (3, "Coordinate with driver")], + default=0 + ) + shape_dist_traveled = models.FloatField(null=True, blank=True) + timepoint = models.IntegerField(choices=[(0, "Approximate"), (1, "Exact")], default=1) + + class Meta: + unique_together = ('trip', 'stop_sequence') + + def __str__(self): + return f"{self.trip.trip_id} - seq {self.stop_sequence}" diff --git a/gtfs/utils/schedule.py b/gtfs/utils/schedule.py index 4b2aabf..b8550b2 100644 --- a/gtfs/utils/schedule.py +++ b/gtfs/utils/schedule.py @@ -1,13 +1,355 @@ + +# gtfs/schedule.py + +from __future__ import annotations + +import csv +import io +import zipfile +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict, Any, List, Optional, Tuple, Union + +from django.db import transaction + +# Importa tus modelos reales (ajusta el import segΓΊn tu estructura) +try: + from gtfs.models import ( + Agency, Route, Trip, Stop, StopTime, Calendar, CalendarDate, Shape, FeedInfo + ) +except Exception: # pragma: no cover - permite que el archivo importe aunque aΓΊn no existan los modelos + Agency = Route = Trip = Stop = StopTime = Calendar = CalendarDate = Shape = FeedInfo = object # type: ignore + +logger = logging.getLogger(__name__) + + +# ---------------------------- +# Dataclasses de resultados +# ---------------------------- + +@dataclass +class ImportResult: + """Resumen de la importaciΓ³n.""" + zip_path: Path + inserted: Dict[str, int] = field(default_factory=dict) # tabla -> filas insertadas + updated: Dict[str, int] = field(default_factory=dict) # tabla -> filas actualizadas (si implementas upsert) + errors: List[str] = field(default_factory=list) + warnings: List[str] = field(default_factory=list) + + +@dataclass +class ExportResult: + """Resumen de la exportaciΓ³n.""" + output_zip: Path + files_written: List[str] = field(default_factory=list) + errors: List[str] = field(default_factory=list) + warnings: List[str] = field(default_factory=list) + + +@dataclass +class ValidationIssue: + level: str # "ERROR" | "WARNING" + file: str + rownum: Optional[int] + field: Optional[str] + message: str + + +@dataclass +class ValidationReport: + source: str + from_zip: bool + issues: List[ValidationIssue] = field(default_factory=list) + + @property + def errors(self) -> List[ValidationIssue]: + return [i for i in self.issues if i.level == "ERROR"] + + @property + def warnings(self) -> List[ValidationIssue]: + return [i for i in self.issues if i.level == "WARNING"] + + +# ---------------------------- +# Constantes y utilidades +# ---------------------------- + +# Orden recomendado de procesamiento (respeta dependencias FK) +GTFS_LOAD_ORDER = [ + "agency", + "routes", + "stops", + "calendar", + "calendar_dates", + "shapes", + "trips", + "stop_times", + "feed_info", +] + +# Mapeo filename -> nombre de modelo Django (ajΓΊstalo a tus modelos reales) +MODEL_MAP = { + "agency": Agency, + "routes": Route, + "stops": Stop, + "calendar": Calendar, + "calendar_dates": CalendarDate, + "shapes": Shape, + "trips": Trip, + "stop_times": StopTime, + "feed_info": FeedInfo, +} + +REQUIRED_FILES = {"agency", "routes", "trips", "stops", "stop_times", "calendar"} + +CSV_DIALECT = { + "delimiter": ",", + "quotechar": '"', + "lineterminator": "\n", +} + +def _open_zip(zip_path: Union[str, Path]) -> zipfile.ZipFile: + """Abre un ZIP en modo lectura con manejo de errores bΓ‘sico.""" + zpath = Path(zip_path) + if not zpath.exists(): + raise FileNotFoundError(f"No existe el archivo: {zpath}") + return zipfile.ZipFile(zpath, "r") + + +def _read_csv_from_zip(zf: zipfile.ZipFile, name: str) -> Tuple[List[str], List[Dict[str, str]]]: + """ + Lee un CSV (TXT GTFS) del ZIP y devuelve (headers, rows). + No convierte tipos; deja todo como string para validaciΓ³n posterior. + """ + fname = f"{name}.txt" + if fname not in zf.namelist(): + return [], [] # el validador/llamador decidirΓ‘ si esto es error o no + + with zf.open(fname, "r") as fp: + # GTFS usa UTF-8 sin BOM por convenciΓ³n + text = io.TextIOWrapper(fp, encoding="utf-8") + reader = csv.DictReader(text) + headers = reader.fieldnames or [] + rows = list(reader) + return headers, rows + + +def _write_csv_to_zip(zf: zipfile.ZipFile, name: str, headers: List[str], rows: List[Dict[str, Any]]) -> None: + """Escribe un CSV en el ZIP con los headers dados.""" + data = io.StringIO() + writer = csv.DictWriter(data, fieldnames=headers, **CSV_DIALECT) + writer.writeheader() + for row in rows: + writer.writerow(row) + zf.writestr(f"{name}.txt", data.getvalue()) + + + +# Funciones gtfs Schedule + + +def import_gtfs_schedule( + zip_path: Union[str, Path], + *, + schema: Optional[Dict[str, Any]] = None, + strict: bool = True, + replace: bool = False, +) -> ImportResult: + """ + Importa un feed GTFS Schedule (ZIP -> BD Django). + + ParΓ‘metros + ---------- + zip_path : str | Path + Ruta al archivo .zip con los .txt GTFS. + schema : dict | None + Esquema machine-readable (tu JSON/YAML parseado). Si se provee, se usa para validar campos/tipos. + strict : bool + Si True, cualquier error de validaciΓ³n aborta la importaciΓ³n. + replace : bool + Si True, limpia tablas antes de cargar (truncate); si False, intenta inserciones (y opcionalmente upsert). + + Retorna + ------- + ImportResult + EstadΓ­sticas y/o errores de la importaciΓ³n. + + Flujo (a implementar con TODO) + ------------------------------ + 1) Abrir el ZIP y verificar archivos requeridos (REQUIRED_FILES). + 2) Validar contra `schema` (si se proporciona): campos requeridos, tipos, enums. + 3) transaction.atomic(): insertar por orden GTFS_LOAD_ORDER. + - Opcional: bulk_create para rendimiento. + - Manejar PK compuestas con UniqueConstraint si aplica. + 4) Registrar counts por tabla y cualquier warning/error. + """ + result = ImportResult(zip_path=Path(zip_path)) + # TODO: abrir y validar archivos presentes vs REQUIRED_FILES + try: + with _open_zip(zip_path) as zf: + available = {n.replace(".txt", "") for n in zf.namelist() if n.endswith(".txt")} + missing = REQUIRED_FILES - available + if missing: + msg = f"Faltan archivos requeridos: {sorted(missing)}" + result.errors.append(msg) + if strict: + return result + logger.warning(msg) + # TODO: validaciΓ³n esquemΓ‘tica (si schema no es None) + if replace: + # TODO: borrar datos existentes (con cuidado con orden por FKs) + pass + + with transaction.atomic(): + for table in GTFS_LOAD_ORDER: + if f"{table}.txt" not in zf.namelist(): + continue # puede ser opcional + headers, rows = _read_csv_from_zip(zf, table) + # TODO: mapear filas -> instancias de MODEL_MAP[table] + # TODO: convertir tipos (int/float/date/time) segΓΊn schema + # TODO: bulk_create y actualizar result.inserted[table] + pass + + except Exception as exc: # captura global para devolver en result + logger.exception("Error importando GTFS") + result.errors.append(str(exc)) + return result + + +def export_gtfs_schedule( + output_zip_path: Union[str, Path], + *, + include_optional: bool = True, + schema: Optional[Dict[str, Any]] = None, +) -> ExportResult: + """ + Exporta la BD Django a un feed GTFS Schedule (BD -> ZIP). + + ParΓ‘metros + ---------- + output_zip_path : str | Path + Ruta destino para el archivo .zip a generar. + include_optional : bool + Si True, incluye tablas opcionales si existen filas (p.ej. shapes, feed_info). + schema : dict | None + Si se provee, se usa para ordenar columnas segΓΊn el schema y asegurar compatibilidad. + + Retorna + ------- + ExportResult + Resumen de archivos escritos y posibles advertencias/errores. + + Flujo (a implementar con TODO) + ------------------------------ + 1) Consultar cada modelo en orden y construir listas de dicts (rows). + 2) Ordenar columnas segΓΊn `schema` si estΓ‘ disponible. + 3) Escribir cada .txt en el ZIP con _write_csv_to_zip. + """ + output_path = Path(output_zip_path) + result = ExportResult(output_zip=output_path) + + try: + output_path.parent.mkdir(parents=True, exist_ok=True) + with zipfile.ZipFile(output_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: + # Ejemplo de patrΓ³n por tabla (rellenar en TODO): + for table in GTFS_LOAD_ORDER: + Model = MODEL_MAP.get(table) + if Model is object: + continue # modelos aΓΊn no definidos en el esqueleto + + # TODO: si include_optional es False, saltar opcionales sin filas + # TODO: query = Model.objects.all() + # TODO: construir headers y rows segΓΊn schema o metadatos del modelo + # headers = [...] + # rows = [ {...}, {...} ] + # if not rows: continue + # _write_csv_to_zip(zf, table, headers, rows) + # result.files_written.append(f"{table}.txt") + pass + + except Exception as exc: + logger.exception("Error exportando GTFS") + result.errors.append(str(exc)) + + return result + + +def validate_gtfs_schedule( + source: Union[str, Path], + *, + from_zip: bool = True, + schema: Optional[Dict[str, Any]] = None, + strict_types: bool = True, +) -> ValidationReport: + """ + Valida un feed GTFS Schedule contra el esquema (desde ZIP o desde BD). + + ParΓ‘metros + ---------- + source : str | Path + Ruta a .zip (si from_zip=True) o identificador/contexto para lectura desde BD. + from_zip : bool + True para validar archivo ZIP; False para validar los datos en BD. + schema : dict | None + Esquema machine-readable (recomendado). Si None, se valida con reglas mΓ­nimas. + strict_types : bool + Si True, falla en tipos invΓ‘lidos; si False, reporta WARNING pero continΓΊa. + + Retorna + ------- + ValidationReport + Lista de issues (errors/warnings) con contexto (archivo, fila, campo). + """ + report = ValidationReport(source=str(source), from_zip=from_zip) + + # TODO: implementar validaciones mΓ­nimas: + # - Archivos requeridos presentes (cuando from_zip=True) + # - Campos requeridos presentes por archivo + # - Tipos: int/float/date/time (time puede exceder 24h) + # - Rangos: lat/lon, enums (route_type, pickup/drop_off, etc.) + # - Integridad referencial bΓ‘sica (FKs) + # - PKs/unique (incluidas compuestas) + + try: + if from_zip: + with _open_zip(source) as zf: + # Ejemplo de registro de issue (cuando implementes validaciones reales): + # report.issues.append(ValidationIssue( + # level="ERROR", file="stops.txt", rownum=12, field="stop_lat", + # message="Fuera de rango (-90..90)" + # )) + pass + else: + # ValidaciΓ³n leyendo directamente de la BD (queries a los modelos) + pass + + except Exception as exc: + logger.exception("Error validando GTFS") + report.issues.append(ValidationIssue( + level="ERROR", file="(general)", rownum=None, field=None, message=str(exc) + )) + + return report + + + + + # Import GTFS models! -def import_gtfs_schedule(): - return "Imported GTFS Schedule" +#def import_gtfs_schedule(zip_path: str) -> dict: + """ + Importa un archivo GTFS (ZIP) a la base de datos Django. + :param zip_path: ruta al archivo GTFS .zip + :return: dict con estadΓ­sticas (registros insertados, errores, warnings) + """ -def export_gtfs_schedule(): - return "Exported GTFS Schedule" +#def export_gtfs_schedule(): + #return "Exported GTFS Schedule" -def validate_gtfs_schedule(): - return "Validated GTFS Schedule" +#def validate_gtfs_schedule(): + #return "Validated GTFS Schedule" diff --git a/manage.py b/manage.py new file mode 100644 index 0000000..4a29feb --- /dev/null +++ b/manage.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +import os +import sys + +if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tests.settings") + + from django.core.management import execute_from_command_line + + execute_from_command_line(sys.argv) + + diff --git a/tests/settings.py b/tests/settings.py index da15686..3ce3199 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -1,6 +1,11 @@ +# tests/settings.py + import os from pathlib import Path +# -------------------------------------------- +# BASE CONFIGURATION +# -------------------------------------------- BASE_DIR = Path(__file__).resolve().parent SECRET_KEY = "test-secret-key" @@ -10,6 +15,9 @@ USE_TZ = True TIME_ZONE = "UTC" +# -------------------------------------------- +# INSTALLED APPS +# -------------------------------------------- INSTALLED_APPS = [ "django.contrib.admin", "django.contrib.auth", @@ -17,11 +25,13 @@ "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", - # Enable GeoDjango if requested (default off to avoid system deps for unit tests) - *(["django.contrib.gis"] if os.getenv("USE_GIS", "0") == "1" else []), - "gtfs", + "django.contrib.gis", # GeoDjango enabled + "gtfs", # GTFS Schedule app ] +# -------------------------------------------- +# MIDDLEWARE +# -------------------------------------------- MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", @@ -31,6 +41,9 @@ "django.contrib.messages.middleware.MessageMiddleware", ] +# -------------------------------------------- +# URLS / TEMPLATES / DEFAULTS +# -------------------------------------------- ROOT_URLCONF = "tests.urls" TEMPLATES = [ @@ -52,26 +65,32 @@ STATIC_URL = "/static/" DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" -# Database: default to SQLite for unit tests; allow GIS backends via env -if os.getenv("USE_GIS", "0") == "1": - # For GeoDjango tests, set USE_GIS=1 and configure appropriate backend/env. - DATABASES = { - "default": { - "ENGINE": os.getenv( - "DJANGO_DB_ENGINE", - "django.contrib.gis.db.backends.postgis", - ), - "NAME": os.getenv("POSTGRES_DB", "gtfs_test"), - "USER": os.getenv("POSTGRES_USER", "postgres"), - "PASSWORD": os.getenv("POSTGRES_PASSWORD", "postgres"), - "HOST": os.getenv("POSTGRES_HOST", "localhost"), - "PORT": int(os.getenv("POSTGRES_PORT", "5432")), - } +# -------------------------------------------- +# DATABASE: GeoDjango +# -------------------------------------------- +DATABASES = { + "default": { + "ENGINE": "django.contrib.gis.db.backends.postgis", + "NAME": "gtfs_test", # nombre de la BD que se crea + "USER": "geovanny", # usuario de la BD + "PASSWORD": "postgres", # contraseΓ±a (se puede cambiar) + "HOST": "localhost", + "PORT": "5432", } -else: - DATABASES = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": os.path.join(BASE_DIR, "db.sqlite3"), - } - } \ No newline at end of file +} + + +SPATIALITE_LIBRARY_PATH = "mod_spatialite" + +# πŸ’‘ Evita triggers ISO con 'rowid' +SPATIAL_REF_SYS_TABLE = "spatial_ref_sys" +SPATIALITE_INIT_COMMANDS = [ + "SELECT InitSpatialMetaData(1);" # Safe initialization +] + + +# -------------------------------------------- +# ENVIRONMENT VARIABLES FOR GDAL +# -------------------------------------------- +os.environ["GDAL_LIBRARY_PATH"] = "/usr/lib/libgdal.so.30" + diff --git a/tests/test_schedule_crud.py b/tests/test_schedule_crud.py new file mode 100644 index 0000000..6d3e708 --- /dev/null +++ b/tests/test_schedule_crud.py @@ -0,0 +1,139 @@ +# gtfs/tests/test_schedule_crud.py +from django.test import TestCase +from gtfs.models import ( + FeedInfoSchedule, + AgencySchedule, + RouteSchedule, + CalendarSchedule, + TripSchedule, + StopSchedule, + StopTimeSchedule, + ShapeSchedule, + CalendarDateSchedule +) + +class ScheduleCRUDTests(TestCase): + """Integration test for GTFS Schedule models (no composite PKs).""" + + def setUp(self): + # Crear feed base + self.feed = FeedInfoSchedule.objects.create( + feed_publisher_name="UCR Feed", + feed_publisher_url="https://ucr.ac.cr", + feed_lang="es", + feed_version="v1.0" + ) + + # Crear agencia + self.agency = AgencySchedule.objects.create( + feed=self.feed, + agency_id="UCR", + agency_name="Universidad de Costa Rica", + agency_url="https://ucr.ac.cr", + agency_timezone="America/Costa_Rica" + ) + + # Crear ruta + self.route = RouteSchedule.objects.create( + feed=self.feed, + route_id="R1", + agency=self.agency, + route_short_name="1", + route_long_name="Campus a San Pedro", + route_type=3 + ) + + # Crear calendario + self.calendar = CalendarSchedule.objects.create( + feed=self.feed, + service_id="S2025", + monday=1, tuesday=1, wednesday=1, thursday=1, friday=1, + saturday=0, sunday=0, + start_date="2025-03-01", end_date="2025-12-31" + ) + + # Crear shape + self.shape = ShapeSchedule.objects.create( + feed=self.feed, + shape_id="Shape1", + shape_pt_lat=9.936, + shape_pt_lon=-84.054, + shape_pt_sequence=1 + ) + + # Crear parada + self.stop = StopSchedule.objects.create( + feed=self.feed, + stop_id="SP01", + stop_name="Parada San Pedro", + stop_lat=9.936, + stop_lon=-84.054 + ) + + # Crear viaje + self.trip = TripSchedule.objects.create( + feed=self.feed, + trip_id="T100", + route=self.route, + service=self.calendar, + trip_headsign="San Pedro", + shape=self.shape + ) + + # Crear stop_time + self.stoptime = StopTimeSchedule.objects.create( + feed=self.feed, + trip=self.trip, + stop=self.stop, + arrival_time="07:30:00", + departure_time="07:31:00", + stop_sequence=1 + ) + + # Crear calendar_date + self.cal_date = CalendarDateSchedule.objects.create( + feed=self.feed, + service=self.calendar, + date="2025-04-01", + exception_type=1 + ) + + def test_crud_integrity(self): + """Verifica que las relaciones bΓ‘sicas funcionen correctamente.""" + # Feed -> Agency + self.assertEqual(self.feed.agencies.count(), 1) + self.assertEqual(self.feed.agencies.first().agency_id, "UCR") + + # Agency -> Route + self.assertEqual(self.agency.routes.count(), 1) + self.assertEqual(self.agency.routes.first().route_id, "R1") + + # Route -> Trip + self.assertEqual(self.route.trips.count(), 1) + self.assertEqual(self.route.trips.first().trip_id, "T100") + + # Trip -> StopTimes + self.assertEqual(self.trip.stop_times.count(), 1) + st = self.trip.stop_times.first() + self.assertEqual(st.stop_sequence, 1) + self.assertEqual(st.stop.stop_name, "Parada San Pedro") + + # Calendar -> CalendarDates + self.assertEqual(self.calendar.calendar_dates.count(), 1) + self.assertEqual(self.calendar.calendar_dates.first().date.strftime("%Y-%m-%d"), "2025-04-01") + + # Query reversas + self.assertEqual(self.stop.stop_times.count(), 1) + self.assertEqual(self.feed.routes.count(), 1) + self.assertEqual(self.feed.trips.count(), 1) + + def test_update_delete(self): + """Prueba actualizaciones y eliminaciones bΓ‘sicas.""" + # Update + self.route.route_long_name = "Campus a Montes de Oca" + self.route.save() + self.assertEqual(RouteSchedule.objects.get(route_id="R1").route_long_name, "Campus a Montes de Oca") + + # Delete + self.trip.delete() + self.assertEqual(self.route.trips.count(), 0)