98 lines
2.9 KiB
Python
98 lines
2.9 KiB
Python
"""Tests for data validators."""
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from src.core.enums import Timeframe
|
|
from src.data.validators import check_continuity, detect_outliers, validate_ohlcv
|
|
|
|
|
|
@pytest.fixture
|
|
def valid_ohlcv_data():
|
|
"""Create valid OHLCV DataFrame."""
|
|
dates = pd.date_range("2024-01-01 03:00", periods=100, freq="1min")
|
|
return pd.DataFrame(
|
|
{
|
|
"timestamp": dates,
|
|
"open": [100.0 + i * 0.1 for i in range(100)],
|
|
"high": [100.5 + i * 0.1 for i in range(100)],
|
|
"low": [99.5 + i * 0.1 for i in range(100)],
|
|
"close": [100.2 + i * 0.1 for i in range(100)],
|
|
"volume": [1000] * 100,
|
|
}
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def invalid_ohlcv_data():
|
|
"""Create invalid OHLCV DataFrame."""
|
|
dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min")
|
|
df = pd.DataFrame(
|
|
{
|
|
"timestamp": dates,
|
|
"open": [100.0] * 10,
|
|
"high": [99.0] * 10, # Invalid: high < low
|
|
"low": [99.5] * 10,
|
|
"close": [100.2] * 10,
|
|
}
|
|
)
|
|
return df
|
|
|
|
|
|
def test_validate_ohlcv_valid(valid_ohlcv_data):
|
|
"""Test validation with valid data."""
|
|
df = validate_ohlcv(valid_ohlcv_data)
|
|
assert len(df) == 100
|
|
|
|
|
|
def test_validate_ohlcv_invalid(invalid_ohlcv_data):
|
|
"""Test validation with invalid data."""
|
|
with pytest.raises(Exception): # Should raise ValidationError
|
|
validate_ohlcv(invalid_ohlcv_data)
|
|
|
|
|
|
def test_validate_ohlcv_missing_columns():
|
|
"""Test validation with missing columns."""
|
|
df = pd.DataFrame({"timestamp": pd.date_range("2024-01-01", periods=10)})
|
|
with pytest.raises(Exception): # Should raise ValidationError
|
|
validate_ohlcv(df)
|
|
|
|
|
|
def test_check_continuity(valid_ohlcv_data):
|
|
"""Test continuity check."""
|
|
is_continuous, gaps = check_continuity(valid_ohlcv_data, Timeframe.M1)
|
|
assert is_continuous
|
|
assert len(gaps) == 0
|
|
|
|
|
|
def test_check_continuity_with_gaps():
|
|
"""Test continuity check with gaps."""
|
|
# Create data with gaps
|
|
dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min")
|
|
# Remove some dates to create gaps
|
|
dates = dates[[0, 1, 2, 5, 6, 7, 8, 9]] # Gap between index 2 and 5
|
|
|
|
df = pd.DataFrame(
|
|
{
|
|
"timestamp": dates,
|
|
"open": [100.0] * len(dates),
|
|
"high": [100.5] * len(dates),
|
|
"low": [99.5] * len(dates),
|
|
"close": [100.2] * len(dates),
|
|
}
|
|
)
|
|
|
|
is_continuous, gaps = check_continuity(df, Timeframe.M1)
|
|
assert not is_continuous
|
|
assert len(gaps) > 0
|
|
|
|
|
|
def test_detect_outliers(valid_ohlcv_data):
|
|
"""Test outlier detection."""
|
|
# Add an outlier
|
|
df = valid_ohlcv_data.copy()
|
|
df.loc[50, "close"] = 200.0 # Extreme value
|
|
|
|
outliers = detect_outliers(df, columns=["close"], method="iqr", threshold=3.0)
|
|
assert outliers["is_outlier"].sum() > 0
|