173 lines
4.6 KiB
Python
Executable File
173 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Validate data pipeline setup for v0.2.0."""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from src.core.enums import Timeframe # noqa: E402
|
|
from src.data.database import get_engine, init_database # noqa: E402
|
|
from src.data.loaders import CSVLoader, ParquetLoader # noqa: E402
|
|
from src.data.preprocessors import ( # noqa: E402
|
|
filter_session,
|
|
handle_missing_data,
|
|
remove_duplicates,
|
|
)
|
|
from src.data.repositories import OHLCVRepository # noqa: E402
|
|
from src.data.validators import check_continuity, detect_outliers, validate_ohlcv # noqa: E402
|
|
from src.logging import get_logger # noqa: E402
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
def validate_imports():
|
|
"""Validate that all data module imports work."""
|
|
print("✓ Data module imports successful")
|
|
|
|
|
|
def validate_database():
|
|
"""Validate database setup."""
|
|
try:
|
|
engine = get_engine()
|
|
assert engine is not None
|
|
print("✓ Database engine created")
|
|
|
|
# Test initialization (will create tables if needed)
|
|
init_database(create_tables=True)
|
|
print("✓ Database initialization successful")
|
|
except Exception as e:
|
|
print(f"✗ Database validation failed: {e}")
|
|
raise
|
|
|
|
|
|
def validate_loaders():
|
|
"""Validate data loaders."""
|
|
try:
|
|
csv_loader = CSVLoader()
|
|
parquet_loader = ParquetLoader()
|
|
assert csv_loader is not None
|
|
assert parquet_loader is not None
|
|
print("✓ Data loaders initialized")
|
|
except Exception as e:
|
|
print(f"✗ Loader validation failed: {e}")
|
|
raise
|
|
|
|
|
|
def validate_preprocessors():
|
|
"""Validate preprocessors."""
|
|
import pandas as pd
|
|
import pytz # type: ignore[import-untyped]
|
|
|
|
# Create sample data with EST timezone (trading session is 3-4 AM EST)
|
|
est = pytz.timezone("America/New_York")
|
|
timestamps = pd.date_range("2024-01-01 03:00", periods=10, freq="1min", tz=est)
|
|
|
|
df = pd.DataFrame(
|
|
{
|
|
"timestamp": timestamps,
|
|
"open": [100.0] * 10,
|
|
"high": [100.5] * 10,
|
|
"low": [99.5] * 10,
|
|
"close": [100.2] * 10,
|
|
}
|
|
)
|
|
|
|
# Test preprocessors
|
|
df_processed = handle_missing_data(df)
|
|
df_processed = remove_duplicates(df_processed)
|
|
df_filtered = filter_session(df_processed)
|
|
|
|
assert len(df_filtered) > 0
|
|
print("✓ Preprocessors working")
|
|
|
|
|
|
def validate_validators():
|
|
"""Validate validators."""
|
|
import pandas as pd
|
|
|
|
# Create valid data (timezone not required for validators)
|
|
df = pd.DataFrame(
|
|
{
|
|
"timestamp": pd.date_range("2024-01-01 03:00", periods=10, freq="1min"),
|
|
"open": [100.0] * 10,
|
|
"high": [100.5] * 10,
|
|
"low": [99.5] * 10,
|
|
"close": [100.2] * 10,
|
|
}
|
|
)
|
|
|
|
# Test validators
|
|
df_validated = validate_ohlcv(df)
|
|
is_continuous, gaps = check_continuity(df_validated, Timeframe.M1)
|
|
_ = detect_outliers(df_validated) # Check it runs without error
|
|
|
|
assert len(df_validated) == 10
|
|
print("✓ Validators working")
|
|
|
|
|
|
def validate_repositories():
|
|
"""Validate repositories."""
|
|
from src.data.database import get_db_session
|
|
|
|
try:
|
|
with get_db_session() as session:
|
|
repo = OHLCVRepository(session=session)
|
|
assert repo is not None
|
|
print("✓ Repositories working")
|
|
except Exception as e:
|
|
print(f"✗ Repository validation failed: {e}")
|
|
raise
|
|
|
|
|
|
def validate_directories():
|
|
"""Validate directory structure."""
|
|
required_dirs = [
|
|
"data/raw/ohlcv/1min",
|
|
"data/raw/ohlcv/5min",
|
|
"data/raw/ohlcv/15min",
|
|
"data/processed/features",
|
|
"data/processed/patterns",
|
|
"data/labels/individual_patterns",
|
|
]
|
|
|
|
for dir_name in required_dirs:
|
|
dir_path = Path(dir_name)
|
|
if not dir_path.exists():
|
|
print(f"✗ Missing directory: {dir_name}")
|
|
return False
|
|
|
|
print("✓ Directory structure valid")
|
|
return True
|
|
|
|
|
|
def main():
|
|
"""Run all validation checks."""
|
|
print("Validating ICT ML Trading System v0.2.0 Data Pipeline...")
|
|
print("-" * 60)
|
|
|
|
try:
|
|
validate_imports()
|
|
validate_database()
|
|
validate_loaders()
|
|
validate_preprocessors()
|
|
validate_validators()
|
|
validate_repositories()
|
|
validate_directories()
|
|
|
|
print("-" * 60)
|
|
print("✓ All validations passed!")
|
|
return 0
|
|
|
|
except Exception as e:
|
|
print(f"✗ Validation failed: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|