feat(v0.2.0): data pipeline
This commit is contained in:
97
tests/unit/test_data/test_validators.py
Normal file
97
tests/unit/test_data/test_validators.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Tests for data validators."""
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from src.core.enums import Timeframe
|
||||
from src.data.validators import check_continuity, detect_outliers, validate_ohlcv
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def valid_ohlcv_data():
|
||||
"""Create valid OHLCV DataFrame."""
|
||||
dates = pd.date_range("2024-01-01 03:00", periods=100, freq="1min")
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"timestamp": dates,
|
||||
"open": [100.0 + i * 0.1 for i in range(100)],
|
||||
"high": [100.5 + i * 0.1 for i in range(100)],
|
||||
"low": [99.5 + i * 0.1 for i in range(100)],
|
||||
"close": [100.2 + i * 0.1 for i in range(100)],
|
||||
"volume": [1000] * 100,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def invalid_ohlcv_data():
|
||||
"""Create invalid OHLCV DataFrame."""
|
||||
dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min")
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"timestamp": dates,
|
||||
"open": [100.0] * 10,
|
||||
"high": [99.0] * 10, # Invalid: high < low
|
||||
"low": [99.5] * 10,
|
||||
"close": [100.2] * 10,
|
||||
}
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
def test_validate_ohlcv_valid(valid_ohlcv_data):
|
||||
"""Test validation with valid data."""
|
||||
df = validate_ohlcv(valid_ohlcv_data)
|
||||
assert len(df) == 100
|
||||
|
||||
|
||||
def test_validate_ohlcv_invalid(invalid_ohlcv_data):
|
||||
"""Test validation with invalid data."""
|
||||
with pytest.raises(Exception): # Should raise ValidationError
|
||||
validate_ohlcv(invalid_ohlcv_data)
|
||||
|
||||
|
||||
def test_validate_ohlcv_missing_columns():
|
||||
"""Test validation with missing columns."""
|
||||
df = pd.DataFrame({"timestamp": pd.date_range("2024-01-01", periods=10)})
|
||||
with pytest.raises(Exception): # Should raise ValidationError
|
||||
validate_ohlcv(df)
|
||||
|
||||
|
||||
def test_check_continuity(valid_ohlcv_data):
|
||||
"""Test continuity check."""
|
||||
is_continuous, gaps = check_continuity(valid_ohlcv_data, Timeframe.M1)
|
||||
assert is_continuous
|
||||
assert len(gaps) == 0
|
||||
|
||||
|
||||
def test_check_continuity_with_gaps():
|
||||
"""Test continuity check with gaps."""
|
||||
# Create data with gaps
|
||||
dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min")
|
||||
# Remove some dates to create gaps
|
||||
dates = dates[[0, 1, 2, 5, 6, 7, 8, 9]] # Gap between index 2 and 5
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"timestamp": dates,
|
||||
"open": [100.0] * len(dates),
|
||||
"high": [100.5] * len(dates),
|
||||
"low": [99.5] * len(dates),
|
||||
"close": [100.2] * len(dates),
|
||||
}
|
||||
)
|
||||
|
||||
is_continuous, gaps = check_continuity(df, Timeframe.M1)
|
||||
assert not is_continuous
|
||||
assert len(gaps) > 0
|
||||
|
||||
|
||||
def test_detect_outliers(valid_ohlcv_data):
|
||||
"""Test outlier detection."""
|
||||
# Add an outlier
|
||||
df = valid_ohlcv_data.copy()
|
||||
df.loc[50, "close"] = 200.0 # Extreme value
|
||||
|
||||
outliers = detect_outliers(df, columns=["close"], method="iqr", threshold=3.0)
|
||||
assert outliers["is_outlier"].sum() > 0
|
||||
Reference in New Issue
Block a user