feat(v0.2.0): complete data pipeline with loaders, database, and validation

This commit is contained in:
0x_n3m0_
2026-01-05 11:54:04 +02:00
parent b5e7043df6
commit 0079127ade
7 changed files with 792 additions and 124 deletions

View File

@@ -41,23 +41,27 @@ def test_init_database(temp_db):
def test_get_db_session(temp_db):
"""Test database session context manager."""
from sqlalchemy import text
init_database(create_tables=True)
with get_db_session() as session:
assert session is not None
# Session should be usable
result = session.execute("SELECT 1").scalar()
result = session.execute(text("SELECT 1")).scalar()
assert result == 1
def test_session_rollback_on_error(temp_db):
"""Test that session rolls back on error."""
from sqlalchemy import text
init_database(create_tables=True)
try:
with get_db_session() as session:
# Cause an error
session.execute("SELECT * FROM nonexistent_table")
session.execute(text("SELECT * FROM nonexistent_table"))
except Exception:
pass # Expected

View File

@@ -67,8 +67,14 @@ def test_remove_duplicates(sample_data_with_duplicates):
def test_filter_session():
"""Test session filtering."""
# Create data spanning multiple hours
dates = pd.date_range("2024-01-01 02:00", periods=120, freq="1min")
import pytz # type: ignore[import-untyped]
# Create data spanning multiple hours explicitly in EST
# Start at 2 AM EST and go for 2 hours (02:00-04:00)
est = pytz.timezone("America/New_York")
start_time = est.localize(pd.Timestamp("2024-01-01 02:00:00"))
dates = pd.date_range(start=start_time, periods=120, freq="1min")
df = pd.DataFrame(
{
"timestamp": dates,
@@ -79,9 +85,11 @@ def test_filter_session():
}
)
# Filter to 3-4 AM EST
df_filtered = filter_session(df, session_start="03:00", session_end="04:00")
# Filter to 3-4 AM EST - should get rows from minute 60-120 (60 rows)
df_filtered = filter_session(
df, session_start="03:00", session_end="04:00", timezone="America/New_York"
)
# Should have approximately 60 rows (1 hour of 1-minute data)
assert len(df_filtered) > 0
assert len(df_filtered) <= 60
assert len(df_filtered) > 0, f"Expected filtered data but got {len(df_filtered)} rows"
assert len(df_filtered) <= 61 # Inclusive endpoints