diff --git a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc index 76f5aa2..8cb1cc7 100644 Binary files a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc and b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc differ diff --git a/src/exchanges/__pycache__/gettex.cpython-313.pyc b/src/exchanges/__pycache__/gettex.cpython-313.pyc index d81427a..947d402 100644 Binary files a/src/exchanges/__pycache__/gettex.cpython-313.pyc and b/src/exchanges/__pycache__/gettex.cpython-313.pyc differ diff --git a/src/exchanges/deutsche_boerse.py b/src/exchanges/deutsche_boerse.py index 189a621..6c386b3 100644 --- a/src/exchanges/deutsche_boerse.py +++ b/src/exchanges/deutsche_boerse.py @@ -34,6 +34,10 @@ class DeutscheBoerseBase(BaseExchange): response.raise_for_status() files = [] + html_text = response.text + + # Debug: Response-Länge + print(f"[{self.name}] Response length: {len(html_text)} chars") # Primär: Regex-basierte Extraktion (zuverlässiger) # Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz @@ -47,13 +51,16 @@ class DeutscheBoerseBase(BaseExchange): # Generisches Pattern pattern = r'[A-Z]{4}-posttrade-\d{4}-\d{2}-\d{2}T\d{2}_\d{2}\.json\.gz' - matches = re.findall(pattern, response.text) + matches = re.findall(pattern, html_text) files = list(set(matches)) # Sekundär: BeautifulSoup für Links (falls Regex nichts findet) if not files: - soup = BeautifulSoup(response.text, 'html.parser') - for link in soup.find_all('a'): + soup = BeautifulSoup(html_text, 'html.parser') + all_links = soup.find_all('a') + print(f"[{self.name}] Found {len(all_links)} total links on page") + + for link in all_links: href = link.get('href', '') text = link.get_text(strip=True) @@ -65,6 +72,15 @@ class DeutscheBoerseBase(BaseExchange): elif text and 'posttrade' in text.lower() and '.json.gz' in text.lower(): files.append(text) + # Tertiär: Suche nach jedem "posttrade" im HTML und extrahiere Dateinamen + if not files: + # Allgemeineres Pattern für beliebige Dateinamen mit "posttrade" + general_pattern = r'[\w-]*posttrade[\w-]*\d{4}[-_]\d{2}[-_]\d{2}[T_]\d{2}[_:]\d{2}\.json\.gz' + matches = re.findall(general_pattern, html_text, re.IGNORECASE) + files = list(set(matches)) + if files: + print(f"[{self.name}] Found {len(files)} files via general pattern") + print(f"[{self.name}] Found {len(files)} files via regex/soup") return files except Exception as e: @@ -233,6 +249,34 @@ class DeutscheBoerseBase(BaseExchange): print(f"Error parsing record: {e}") return None + def _generate_expected_files(self, target_date: datetime.date) -> List[str]: + """ + Generiert erwartete Dateinamen basierend auf dem bekannten Format. + Format: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz + """ + import re + files = [] + + # Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT) + prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url) + prefix = prefix_match.group(1) if prefix_match else 'DETR' + + date_str = target_date.strftime('%Y-%m-%d') + + # Generiere für alle Stunden des Handelstages (07:00 - 22:00 UTC, alle Minuten) + for hour in range(7, 23): + for minute in range(0, 60): + files.append(f"{prefix}-posttrade-{date_str}T{hour:02d}_{minute:02d}.json.gz") + + # Auch frühe Dateien vom Folgetag (nach Mitternacht UTC) + next_date = target_date + timedelta(days=1) + next_date_str = next_date.strftime('%Y-%m-%d') + for hour in range(0, 3): + for minute in range(0, 60): + files.append(f"{prefix}-posttrade-{next_date_str}T{hour:02d}_{minute:02d}.json.gz") + + return files + def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]: """ Holt alle Trades vom Vortag (oder seit since_date). @@ -248,7 +292,7 @@ class DeutscheBoerseBase(BaseExchange): print(f"[{self.name}] Fetching trades for date: {target_date}") - # Dateiliste holen + # Erst versuchen, Dateiliste von der Seite zu holen files = self._get_file_list() print(f"[{self.name}] Found {len(files)} total files") @@ -256,11 +300,24 @@ class DeutscheBoerseBase(BaseExchange): target_files = self._filter_files_for_date(files, target_date) print(f"[{self.name}] {len(target_files)} files match target date") + # Falls keine Dateien von der Seite gefunden, generiere erwartete Dateinamen + if not target_files: + print(f"[{self.name}] No files from page, trying generated filenames...") + target_files = self._generate_expected_files(target_date) + print(f"[{self.name}] Trying {len(target_files)} potential files") + # Alle passenden Dateien herunterladen und parsen + successful = 0 for file in target_files: trades = self._download_and_parse_file(file) - all_trades.extend(trades) - print(f"[{self.name}] Parsed {len(trades)} trades from {file}") + if trades: + all_trades.extend(trades) + successful += 1 + if successful <= 5: + print(f"[{self.name}] Parsed {len(trades)} trades from {file}") + + if successful > 5: + print(f"[{self.name}] ... and {successful - 5} more files") print(f"[{self.name}] Total trades fetched: {len(all_trades)}") return all_trades diff --git a/src/exchanges/gettex.py b/src/exchanges/gettex.py index 84ac844..7a15e69 100644 --- a/src/exchanges/gettex.py +++ b/src/exchanges/gettex.py @@ -133,17 +133,32 @@ class GettexExchange(BaseExchange): with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: csv_text = f.read().decode('utf-8') - # CSV parsen - reader = csv.DictReader(io.StringIO(csv_text), delimiter=';') + # Debug: Zeige erste Zeilen und Spalten + lines = csv_text.strip().split('\n') + if lines: + print(f"[GETTEX] CSV has {len(lines)} lines, first line (headers): {lines[0][:200]}") + if len(lines) > 1: + print(f"[GETTEX] Sample data row: {lines[1][:200]}") + # CSV parsen - versuche verschiedene Delimiter + delimiter = ';' if ';' in lines[0] else ',' + reader = csv.DictReader(io.StringIO(csv_text), delimiter=delimiter) + + row_count = 0 for row in reader: + row_count += 1 + if row_count == 1: + print(f"[GETTEX] CSV columns: {list(row.keys())}") try: trade = self._parse_csv_row(row) if trade: trades.append(trade) except Exception as e: - print(f"[GETTEX] Error parsing row: {e}") + if row_count <= 3: + print(f"[GETTEX] Error parsing row {row_count}: {e}, row keys: {list(row.keys())}") continue + + print(f"[GETTEX] Processed {row_count} rows, found {len(trades)} valid trades") except requests.exceptions.HTTPError as e: if e.response.status_code != 404: @@ -157,48 +172,102 @@ class GettexExchange(BaseExchange): """ Parst eine CSV-Zeile zu einem Trade. - Erwartete Spalten (RTS Format): - - TrdDtTm: Trading Date/Time - - ISIN: Instrument Identifier - - Pric: Preis - - Qty: Menge - - Ccy: Währung + Unterstützte Spalten (RTS1/RTS2 Format, verschiedene Varianten): + - ISIN / FinInstrmId / Isin: Instrument Identifier + - Pric / Price / pric: Preis + - Qty / Quantity / qty: Menge + - TrdDtTm / TradingDateTime / TrdgDtTm: Trading Date/Time + - TrdDt / TradingDate: Trading Date + - TrdTm / TradingTime: Trading Time """ try: - # ISIN - isin = row.get('ISIN', row.get('FinInstrmId', '')) + # ISIN - versuche verschiedene Spaltennamen + isin = None + for key in ['ISIN', 'Isin', 'isin', 'FinInstrmId', 'FinInstrmId.Id', 'Id']: + if key in row and row[key]: + isin = str(row[key]).strip() + break + if not isin: return None - # Preis - price_str = row.get('Pric', row.get('Price', '0')) - price_str = price_str.replace(',', '.') - price = float(price_str) + # Preis - versuche verschiedene Spaltennamen + price = None + for key in ['Pric', 'Price', 'pric', 'price', 'Pric.Pric.MntryVal.Amt', 'TradPric']: + if key in row and row[key]: + price_str = str(row[key]).replace(',', '.').strip() + try: + price = float(price_str) + if price > 0: + break + except ValueError: + continue - if price <= 0: + if not price or price <= 0: return None - # Menge - qty_str = row.get('Qty', row.get('Quantity', '0')) - qty_str = qty_str.replace(',', '.') - quantity = float(qty_str) + # Menge - versuche verschiedene Spaltennamen + quantity = None + for key in ['Qty', 'Quantity', 'qty', 'quantity', 'TradQty', 'Qty.Unit']: + if key in row and row[key]: + qty_str = str(row[key]).replace(',', '.').strip() + try: + quantity = float(qty_str) + if quantity > 0: + break + except ValueError: + continue - if quantity <= 0: + if not quantity or quantity <= 0: return None - # Timestamp - ts_str = row.get('TrdDtTm', row.get('TradingDateTime', '')) + # Timestamp - versuche verschiedene Formate + ts_str = None + + # Erst kombiniertes Feld versuchen + for key in ['TrdDtTm', 'TradingDateTime', 'TrdgDtTm', 'Timestamp', 'timestamp']: + if key in row and row[key]: + ts_str = str(row[key]).strip() + break + + # Falls nicht gefunden, separate Felder kombinieren if not ts_str: - # Fallback: Separate Felder - trd_dt = row.get('TrdDt', '') - trd_tm = row.get('TrdTm', '00:00:00') - ts_str = f"{trd_dt}T{trd_tm}" + trd_dt = None + trd_tm = '00:00:00' + + for key in ['TrdDt', 'TradingDate', 'Date', 'date']: + if key in row and row[key]: + trd_dt = str(row[key]).strip() + break + + for key in ['TrdTm', 'TradingTime', 'Time', 'time']: + if key in row and row[key]: + trd_tm = str(row[key]).strip() + break + + if trd_dt: + ts_str = f"{trd_dt}T{trd_tm}" + + if not ts_str: + return None # Parse Timestamp (UTC) ts_str = ts_str.replace('Z', '+00:00') if 'T' not in ts_str: ts_str = ts_str.replace(' ', 'T') + # Entferne Mikrosekunden wenn zu lang + if '.' in ts_str: + parts = ts_str.split('.') + if len(parts) > 1: + ms_part = parts[1].split('+')[0].split('-')[0] + if len(ms_part) > 6: + ts_str = parts[0] + '.' + ms_part[:6] + if '+' in parts[1]: + ts_str += '+' + parts[1].split('+')[1] + elif '-' in parts[1][1:]: + ts_str += '-' + parts[1].split('-')[-1] + timestamp = datetime.fromisoformat(ts_str) if timestamp.tzinfo is None: timestamp = timestamp.replace(tzinfo=timezone.utc) @@ -213,7 +282,7 @@ class GettexExchange(BaseExchange): ) except Exception as e: - print(f"[GETTEX] Error parsing CSV row: {e}") + # Nur bei den ersten paar Fehlern loggen return None def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]: @@ -289,7 +358,6 @@ class GettexExchange(BaseExchange): trades = [] try: - print(f"[{self.name}] Downloading: {url}") response = requests.get(url, headers=HEADERS, timeout=60) if response.status_code == 404: @@ -301,16 +369,26 @@ class GettexExchange(BaseExchange): with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f: csv_text = f.read().decode('utf-8') - # CSV parsen - reader = csv.DictReader(io.StringIO(csv_text), delimiter=';') + # Debug: Zeige erste Zeilen + lines = csv_text.strip().split('\n') + if len(lines) <= 1: + # Datei ist leer oder nur Header + return [] + # CSV parsen - versuche verschiedene Delimiter + delimiter = ';' if ';' in lines[0] else (',' if ',' in lines[0] else '\t') + reader = csv.DictReader(io.StringIO(csv_text), delimiter=delimiter) + + row_count = 0 for row in reader: + row_count += 1 try: trade = self._parse_csv_row(row) if trade: trades.append(trade) except Exception as e: - print(f"[{self.name}] Error parsing row: {e}") + if row_count <= 2: + print(f"[{self.name}] Error parsing row: {e}, keys: {list(row.keys())[:5]}") continue print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")