"""Tests for data validators.""" import pandas as pd import pytest from src.core.enums import Timeframe from src.data.validators import check_continuity, detect_outliers, validate_ohlcv @pytest.fixture def valid_ohlcv_data(): """Create valid OHLCV DataFrame.""" dates = pd.date_range("2024-01-01 03:00", periods=100, freq="1min") return pd.DataFrame( { "timestamp": dates, "open": [100.0 + i * 0.1 for i in range(100)], "high": [100.5 + i * 0.1 for i in range(100)], "low": [99.5 + i * 0.1 for i in range(100)], "close": [100.2 + i * 0.1 for i in range(100)], "volume": [1000] * 100, } ) @pytest.fixture def invalid_ohlcv_data(): """Create invalid OHLCV DataFrame.""" dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min") df = pd.DataFrame( { "timestamp": dates, "open": [100.0] * 10, "high": [99.0] * 10, # Invalid: high < low "low": [99.5] * 10, "close": [100.2] * 10, } ) return df def test_validate_ohlcv_valid(valid_ohlcv_data): """Test validation with valid data.""" df = validate_ohlcv(valid_ohlcv_data) assert len(df) == 100 def test_validate_ohlcv_invalid(invalid_ohlcv_data): """Test validation with invalid data.""" with pytest.raises(Exception): # Should raise ValidationError validate_ohlcv(invalid_ohlcv_data) def test_validate_ohlcv_missing_columns(): """Test validation with missing columns.""" df = pd.DataFrame({"timestamp": pd.date_range("2024-01-01", periods=10)}) with pytest.raises(Exception): # Should raise ValidationError validate_ohlcv(df) def test_check_continuity(valid_ohlcv_data): """Test continuity check.""" is_continuous, gaps = check_continuity(valid_ohlcv_data, Timeframe.M1) assert is_continuous assert len(gaps) == 0 def test_check_continuity_with_gaps(): """Test continuity check with gaps.""" # Create data with gaps dates = pd.date_range("2024-01-01 03:00", periods=10, freq="1min") # Remove some dates to create gaps dates = dates[[0, 1, 2, 5, 6, 7, 8, 9]] # Gap between index 2 and 5 df = pd.DataFrame( { "timestamp": dates, "open": [100.0] * len(dates), "high": [100.5] * len(dates), "low": [99.5] * len(dates), "close": [100.2] * len(dates), } ) is_continuous, gaps = check_continuity(df, Timeframe.M1) assert not is_continuous assert len(gaps) > 0 def test_detect_outliers(valid_ohlcv_data): """Test outlier detection.""" # Add an outlier df = valid_ohlcv_data.copy() df.loc[50, "close"] = 200.0 # Extreme value outliers = detect_outliers(df, columns=["close"], method="iqr", threshold=3.0) assert outliers["is_outlier"].sum() > 0