feat(v0.2.0): data pipeline

This commit is contained in:
0x_n3m0_
2026-01-05 11:34:18 +02:00
parent 2527938680
commit b5e7043df6
23 changed files with 2813 additions and 7 deletions

183
scripts/download_data.py Executable file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
"""Download DAX OHLCV data from external sources."""
import argparse
import sys
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from src.core.enums import Timeframe # noqa: E402
from src.logging import get_logger # noqa: E402
logger = get_logger(__name__)
def download_from_csv(
input_file: str,
symbol: str,
timeframe: Timeframe,
output_dir: Path,
) -> None:
"""
Copy/convert CSV file to standard format.
Args:
input_file: Path to input CSV file
symbol: Trading symbol
timeframe: Timeframe enum
output_dir: Output directory
"""
from src.data.loaders import CSVLoader
loader = CSVLoader()
df = loader.load(input_file, symbol=symbol, timeframe=timeframe)
# Ensure output directory exists
output_dir.mkdir(parents=True, exist_ok=True)
# Save as CSV
output_file = output_dir / f"{symbol}_{timeframe.value}.csv"
df.to_csv(output_file, index=False)
logger.info(f"Saved {len(df)} rows to {output_file}")
# Also save as Parquet for faster loading
output_parquet = output_dir / f"{symbol}_{timeframe.value}.parquet"
df.to_parquet(output_parquet, index=False)
logger.info(f"Saved {len(df)} rows to {output_parquet}")
def download_from_api(
symbol: str,
timeframe: Timeframe,
start_date: str,
end_date: str,
output_dir: Path,
api_provider: str = "manual",
) -> None:
"""
Download data from API (placeholder for future implementation).
Args:
symbol: Trading symbol
timeframe: Timeframe enum
start_date: Start date (YYYY-MM-DD)
end_date: End date (YYYY-MM-DD)
output_dir: Output directory
api_provider: API provider name
"""
logger.warning(
"API download not yet implemented. " "Please provide CSV file using --input-file option."
)
logger.info(
f"Would download {symbol} {timeframe.value} data " f"from {start_date} to {end_date}"
)
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Download DAX OHLCV data",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Download from CSV file
python scripts/download_data.py --input-file data.csv \\
--symbol DAX --timeframe 1min \\
--output data/raw/ohlcv/1min/
# Download from API (when implemented)
python scripts/download_data.py --symbol DAX --timeframe 5min \\
--start 2024-01-01 --end 2024-01-31 \\
--output data/raw/ohlcv/5min/
""",
)
# Input options
input_group = parser.add_mutually_exclusive_group(required=True)
input_group.add_argument(
"--input-file",
type=str,
help="Path to input CSV file",
)
input_group.add_argument(
"--api",
action="store_true",
help="Download from API (not yet implemented)",
)
# Required arguments
parser.add_argument(
"--symbol",
type=str,
default="DAX",
help="Trading symbol (default: DAX)",
)
parser.add_argument(
"--timeframe",
type=str,
choices=["1min", "5min", "15min"],
required=True,
help="Timeframe",
)
parser.add_argument(
"--output",
type=str,
required=True,
help="Output directory",
)
# Optional arguments for API download
parser.add_argument(
"--start",
type=str,
help="Start date (YYYY-MM-DD) for API download",
)
parser.add_argument(
"--end",
type=str,
help="End date (YYYY-MM-DD) for API download",
)
args = parser.parse_args()
try:
# Convert timeframe string to enum
timeframe_map = {
"1min": Timeframe.M1,
"5min": Timeframe.M5,
"15min": Timeframe.M15,
}
timeframe = timeframe_map[args.timeframe]
# Create output directory
output_dir = Path(args.output)
output_dir.mkdir(parents=True, exist_ok=True)
# Download data
if args.input_file:
logger.info(f"Downloading from CSV: {args.input_file}")
download_from_csv(args.input_file, args.symbol, timeframe, output_dir)
elif args.api:
if not args.start or not args.end:
parser.error("--start and --end are required for API download")
download_from_api(
args.symbol,
timeframe,
args.start,
args.end,
output_dir,
)
logger.info("Data download completed successfully")
return 0
except Exception as e:
logger.error(f"Data download failed: {e}", exc_info=True)
return 1
if __name__ == "__main__":
sys.exit(main())