Files
dax-ml/tests/unit/test_data/test_validators.py
2026-01-05 11:34:18 +02:00

98 lines
2.9 KiB
Python

"""Tests for data validators."""
import pandas as pd
import pytest
from src.core.enums import Timeframe
from src.data.validators import check_continuity, detect_outliers, validate_ohlcv
@pytest.fixture
def valid_ohlcv_data():
"""Create valid OHLCV DataFrame."""
dates = pd.date_range("2024-01-01 03:00", periods=100, freq="1min")
return pd.DataFrame(
{
"timestamp": dates,
"open": [100.0 + i * 0.1 for i in range(100)],
"high": [100.5 + i * 0.1 for i in range(100)],
"low": [99.5 + i * 0.1 for i in range(100)],
"close": [100.2 + i * 0.1 for i in range(100)],
"volume": [1000] * 100,
}
)
@pytest.fixture
def invalid_ohlcv_data():
"""Create invalid OHLCV DataFrame."""
dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min")
df = pd.DataFrame(
{
"timestamp": dates,
"open": [100.0] * 10,
"high": [99.0] * 10, # Invalid: high < low
"low": [99.5] * 10,
"close": [100.2] * 10,
}
)
return df
def test_validate_ohlcv_valid(valid_ohlcv_data):
"""Test validation with valid data."""
df = validate_ohlcv(valid_ohlcv_data)
assert len(df) == 100
def test_validate_ohlcv_invalid(invalid_ohlcv_data):
"""Test validation with invalid data."""
with pytest.raises(Exception): # Should raise ValidationError
validate_ohlcv(invalid_ohlcv_data)
def test_validate_ohlcv_missing_columns():
"""Test validation with missing columns."""
df = pd.DataFrame({"timestamp": pd.date_range("2024-01-01", periods=10)})
with pytest.raises(Exception): # Should raise ValidationError
validate_ohlcv(df)
def test_check_continuity(valid_ohlcv_data):
"""Test continuity check."""
is_continuous, gaps = check_continuity(valid_ohlcv_data, Timeframe.M1)
assert is_continuous
assert len(gaps) == 0
def test_check_continuity_with_gaps():
"""Test continuity check with gaps."""
# Create data with gaps
dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min")
# Remove some dates to create gaps
dates = dates[[0, 1, 2, 5, 6, 7, 8, 9]] # Gap between index 2 and 5
df = pd.DataFrame(
{
"timestamp": dates,
"open": [100.0] * len(dates),
"high": [100.5] * len(dates),
"low": [99.5] * len(dates),
"close": [100.2] * len(dates),
}
)
is_continuous, gaps = check_continuity(df, Timeframe.M1)
assert not is_continuous
assert len(gaps) > 0
def test_detect_outliers(valid_ohlcv_data):
"""Test outlier detection."""
# Add an outlier
df = valid_ohlcv_data.copy()
df.loc[50, "close"] = 200.0 # Extreme value
outliers = detect_outliers(df, columns=["close"], method="iqr", threshold=3.0)
assert outliers["is_outlier"].sum() > 0