import time import logging import datetime import hashlib import os import gc import requests from src.exchanges.eix import EIXExchange from src.exchanges.ls import LSExchange from src.exchanges.deutsche_boerse import XetraExchange, FrankfurtExchange, QuotrixExchange from src.exchanges.gettex import GettexExchange from src.exchanges.stuttgart import StuttgartExchange from src.exchanges.boersenag import ( DUSAExchange, DUSBExchange, DUSCExchange, DUSDExchange, HAMAExchange, HAMBExchange, HANAExchange, HANBExchange ) from src.database.questdb_client import DatabaseClient logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger("TradingDaemon") DB_USER = os.getenv("DB_USER", "admin") DB_PASSWORD = os.getenv("DB_PASSWORD", "quest") DB_AUTH = (DB_USER, DB_PASSWORD) if DB_USER and DB_PASSWORD else None # Cache für existierende Trades pro Tag (wird nach jedem Exchange geleert) _existing_trades_cache = {} def get_trade_hash(trade): """Erstellt einen eindeutigen Hash für einen Trade.""" key = f"{trade.exchange}|{trade.isin}|{trade.timestamp.isoformat()}|{trade.price}|{trade.quantity}" return hashlib.md5(key.encode()).hexdigest() def get_existing_trades_for_day(db_url, exchange_name, day): """Holt existierende Trades für einen Tag aus der DB (mit Caching).""" cache_key = f"{exchange_name}_{day.strftime('%Y-%m-%d')}" if cache_key in _existing_trades_cache: return _existing_trades_cache[cache_key] day_start_str = day.strftime('%Y-%m-%dT%H:%M:%S.000000Z') day_end = day + datetime.timedelta(days=1) day_end_str = day_end.strftime('%Y-%m-%dT%H:%M:%S.000000Z') query = f""" SELECT isin, timestamp, price, quantity FROM trades WHERE exchange = '{exchange_name}' AND timestamp >= '{day_start_str}' AND timestamp < '{day_end_str}' """ existing_trades = set() try: response = requests.get(f"{db_url}/exec", params={'query': query}, auth=DB_AUTH, timeout=60) if response.status_code == 200: data = response.json() if data.get('dataset'): for row in data['dataset']: isin, ts, price, qty = row if isinstance(ts, str): ts_dt = datetime.datetime.fromisoformat(ts.replace('Z', '+00:00')) else: ts_dt = datetime.datetime.fromtimestamp(ts / 1000000, tz=datetime.timezone.utc) key = (isin, ts_dt.isoformat(), float(price), float(qty)) existing_trades.add(key) except Exception as e: logger.warning(f"Error fetching existing trades for {day}: {e}") _existing_trades_cache[cache_key] = existing_trades return existing_trades def clear_trades_cache(): """Leert den Cache für existierende Trades.""" global _existing_trades_cache _existing_trades_cache = {} gc.collect() def filter_new_trades_for_day(db_url, exchange_name, trades, day): """Filtert neue Trades für einen einzelnen Tag.""" if not trades: return [] existing = get_existing_trades_for_day(db_url, exchange_name, day) new_trades = [] for trade in trades: trade_key = (trade.isin, trade.timestamp.isoformat(), float(trade.price), float(trade.quantity)) if trade_key not in existing: new_trades.append(trade) return new_trades def filter_new_trades_batch(db_url, exchange_name, trades, batch_size=5000): """Filtert neue Trades in Batches, gruppiert nach Tag.""" if not trades: return [] # Gruppiere alle Trades nach Tag trades_by_day = {} for trade in trades: day = trade.timestamp.replace(hour=0, minute=0, second=0, microsecond=0) if day not in trades_by_day: trades_by_day[day] = [] trades_by_day[day].append(trade) new_trades = [] total_days = len(trades_by_day) for i, (day, day_trades) in enumerate(sorted(trades_by_day.items()), 1): if i % 10 == 0 or i == 1: logger.info(f"Checking day {i}/{total_days}: {day.strftime('%Y-%m-%d')} ({len(day_trades)} trades)...") new_for_day = filter_new_trades_for_day(db_url, exchange_name, day_trades, day) new_trades.extend(new_for_day) # Kleine Pause um DB nicht zu überlasten if i < total_days: time.sleep(0.02) return new_trades def get_last_trade_timestamp(db_url, exchange_name): # QuestDB query: get the latest timestamp for a specific exchange query = f"trades where exchange = '{exchange_name}' latest by timestamp" try: # Using the /exec endpoint to get data response = requests.get(f"{db_url}/exec", params={'query': query}, auth=DB_AUTH) if response.status_code == 200: data = response.json() if data['dataset']: # QuestDB returns timestamp in micros since epoch by default in some views, or ISO # Let's assume the timestamp is in the dataset # ILP timestamps are stored as designated timestamps. ts_value = data['dataset'][0][0] # Adjust index based on column order if isinstance(ts_value, str): return datetime.datetime.fromisoformat(ts_value.replace('Z', '+00:00')) else: return datetime.datetime.fromtimestamp(ts_value / 1000000, tz=datetime.timezone.utc) except Exception as e: logger.debug(f"No existing data for {exchange_name} or DB unreachable: {e}") return datetime.datetime.min.replace(tzinfo=datetime.timezone.utc) def process_eix_streaming(db, db_url, eix, historical=False): """Verarbeitet EIX in Streaming-Modus um RAM zu sparen.""" last_ts = get_last_trade_timestamp(db_url, eix.name) logger.info(f"Fetching data from EIX (Last trade: {last_ts}) - STREAMING MODE...") # Hole Liste der zu verarbeitenden Dateien if historical: files = eix.get_files_to_process(limit=None, since_date=None) else: files = eix.get_files_to_process(limit=None, since_date=last_ts) if not files: logger.info("No EIX files to process.") return logger.info(f"Found {len(files)} EIX files to process...") total_new = 0 total_processed = 0 for i, file_item in enumerate(files, 1): file_name = file_item.get('fileName', 'unknown').split('/')[-1] logger.info(f"Processing EIX file {i}/{len(files)}: {file_name}") # Lade eine Datei trades = eix.fetch_trades_from_file(file_item) if not trades: logger.info(f" No trades in file {file_name}") continue total_processed += len(trades) logger.info(f" Loaded {len(trades)} trades, filtering duplicates...") # Filtere Duplikate new_trades = filter_new_trades_batch(db_url, eix.name, trades, batch_size=5000) if new_trades: new_trades.sort(key=lambda x: x.timestamp) db.save_trades(new_trades) total_new += len(new_trades) logger.info(f" Saved {len(new_trades)} new trades (total new: {total_new})") else: logger.info(f" No new trades in this file") # Speicher freigeben del trades del new_trades gc.collect() # Kurze Pause zwischen Dateien time.sleep(0.1) logger.info(f"EIX complete: {total_new} new trades from {total_processed} total processed.") clear_trades_cache() def run_task(historical=False): logger.info(f"Starting Trading Data Fetcher task (Historical: {historical})...") db = DatabaseClient(host="questdb", user=DB_USER, password=DB_PASSWORD) db_url = "http://questdb:9000" # === EIX - Streaming Verarbeitung === try: eix = EIXExchange() process_eix_streaming(db, db_url, eix, historical=historical) except Exception as e: logger.error(f"Error processing EIX: {e}") # === Andere Exchanges - normale Verarbeitung === ls = LSExchange() # Neue Deutsche Börse Exchanges xetra = XetraExchange() frankfurt = FrankfurtExchange() quotrix = QuotrixExchange() gettex = GettexExchange() stuttgart = StuttgartExchange() # Börsenag Exchanges (Düsseldorf, Hamburg, Hannover) dusa = DUSAExchange() dusb = DUSBExchange() dusc = DUSCExchange() dusd = DUSDExchange() hama = HAMAExchange() hamb = HAMBExchange() hana = HANAExchange() hanb = HANBExchange() # Alle anderen Exchanges (kleinere Datenmengen) exchanges_to_process = [ (ls, {'include_yesterday': historical}), # Deutsche Börse Exchanges (xetra, {'include_yesterday': historical}), (frankfurt, {'include_yesterday': historical}), (quotrix, {'include_yesterday': historical}), (gettex, {'include_yesterday': historical}), (stuttgart, {'include_yesterday': historical}), # Börsenag Exchanges (Düsseldorf, Hamburg, Hannover) (dusa, {'include_yesterday': historical}), (dusb, {'include_yesterday': historical}), (dusc, {'include_yesterday': historical}), (dusd, {'include_yesterday': historical}), (hama, {'include_yesterday': historical}), (hamb, {'include_yesterday': historical}), (hana, {'include_yesterday': historical}), (hanb, {'include_yesterday': historical}), ] for exchange, args in exchanges_to_process: try: last_ts = get_last_trade_timestamp(db_url, exchange.name) logger.info(f"Fetching data from {exchange.name} (Last trade: {last_ts})...") trades = exchange.fetch_latest_trades(**args) if not trades: logger.info(f"No trades fetched from {exchange.name}.") continue # Deduplizierung logger.info(f"Filtering {len(trades)} trades for duplicates...") new_trades = filter_new_trades_batch(db_url, exchange.name, trades, batch_size=5000) logger.info(f"Found {len(trades)} total trades, {len(new_trades)} are new.") if new_trades: new_trades.sort(key=lambda x: x.timestamp) db.save_trades(new_trades) logger.info(f"Stored {len(new_trades)} new trades in QuestDB.") # Speicher freigeben nach jedem Exchange del trades if new_trades: del new_trades clear_trades_cache() gc.collect() except Exception as e: logger.error(f"Error processing exchange {exchange.name}: {e}") logger.info("All exchanges processed.") def main(): logger.info("Trading Daemon started.") # 1. Startup Check: Ist die DB leer? db_url = "http://questdb:9000" is_empty = True try: # Prüfe ob bereits Trades in der Tabelle sind response = requests.get(f"{db_url}/exec", params={'query': 'select count(*) from trades'}, auth=DB_AUTH) if response.status_code == 200: data = response.json() if data['dataset'] and data['dataset'][0][0] > 0: is_empty = False except Exception: # Falls Tabelle noch nicht existiert oder DB nicht erreichbar ist is_empty = True if is_empty: logger.info("Database is empty or table doesn't exist. Triggering initial historical fetch...") run_task(historical=True) else: logger.info("Found existing data in database. Triggering catch-up sync...") # Run a normal task to fetch any missing data since the last run run_task(historical=False) logger.info("Catch-up sync completed. Waiting for scheduled run at 23:00.") while True: now = datetime.datetime.now() # Täglich um 23:00 Uhr if now.hour == 23 and now.minute == 0: run_task(historical=False) # Warte 61s, um Mehrfachausführung in derselben Minute zu verhindern time.sleep(61) # Check alle 30 Sekunden time.sleep(30) if __name__ == "__main__": main()