#!/usr/bin/env python3 """Download DAX OHLCV data from external sources.""" import argparse import sys from pathlib import Path # Add project root to path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from src.core.enums import Timeframe # noqa: E402 from src.logging import get_logger # noqa: E402 logger = get_logger(__name__) def download_from_csv( input_file: str, symbol: str, timeframe: Timeframe, output_dir: Path, ) -> None: """ Copy/convert CSV file to standard format. Args: input_file: Path to input CSV file symbol: Trading symbol timeframe: Timeframe enum output_dir: Output directory """ from src.data.loaders import CSVLoader loader = CSVLoader() df = loader.load(input_file, symbol=symbol, timeframe=timeframe) # Ensure output directory exists output_dir.mkdir(parents=True, exist_ok=True) # Save as CSV output_file = output_dir / f"{symbol}_{timeframe.value}.csv" df.to_csv(output_file, index=False) logger.info(f"Saved {len(df)} rows to {output_file}") # Also save as Parquet for faster loading output_parquet = output_dir / f"{symbol}_{timeframe.value}.parquet" df.to_parquet(output_parquet, index=False) logger.info(f"Saved {len(df)} rows to {output_parquet}") def download_from_api( symbol: str, timeframe: Timeframe, start_date: str, end_date: str, output_dir: Path, api_provider: str = "manual", ) -> None: """ Download data from API (placeholder for future implementation). Args: symbol: Trading symbol timeframe: Timeframe enum start_date: Start date (YYYY-MM-DD) end_date: End date (YYYY-MM-DD) output_dir: Output directory api_provider: API provider name """ logger.warning( "API download not yet implemented. " "Please provide CSV file using --input-file option." ) logger.info( f"Would download {symbol} {timeframe.value} data " f"from {start_date} to {end_date}" ) def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Download DAX OHLCV data", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Download from CSV file python scripts/download_data.py --input-file data.csv \\ --symbol DAX --timeframe 1min \\ --output data/raw/ohlcv/1min/ # Download from API (when implemented) python scripts/download_data.py --symbol DAX --timeframe 5min \\ --start 2024-01-01 --end 2024-01-31 \\ --output data/raw/ohlcv/5min/ """, ) # Input options input_group = parser.add_mutually_exclusive_group(required=True) input_group.add_argument( "--input-file", type=str, help="Path to input CSV file", ) input_group.add_argument( "--api", action="store_true", help="Download from API (not yet implemented)", ) # Required arguments parser.add_argument( "--symbol", type=str, default="DAX", help="Trading symbol (default: DAX)", ) parser.add_argument( "--timeframe", type=str, choices=["1min", "5min", "15min"], required=True, help="Timeframe", ) parser.add_argument( "--output", type=str, required=True, help="Output directory", ) # Optional arguments for API download parser.add_argument( "--start", type=str, help="Start date (YYYY-MM-DD) for API download", ) parser.add_argument( "--end", type=str, help="End date (YYYY-MM-DD) for API download", ) args = parser.parse_args() try: # Convert timeframe string to enum timeframe_map = { "1min": Timeframe.M1, "5min": Timeframe.M5, "15min": Timeframe.M15, } timeframe = timeframe_map[args.timeframe] # Create output directory output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) # Download data if args.input_file: logger.info(f"Downloading from CSV: {args.input_file}") download_from_csv(args.input_file, args.symbol, timeframe, output_dir) elif args.api: if not args.start or not args.end: parser.error("--start and --end are required for API download") download_from_api( args.symbol, timeframe, args.start, args.end, output_dir, ) logger.info("Data download completed successfully") return 0 except Exception as e: logger.error(f"Data download failed: {e}", exc_info=True) return 1 if __name__ == "__main__": sys.exit(main())