diff --git a/__pycache__/daemon.cpython-313.pyc b/__pycache__/daemon.cpython-313.pyc index 083d856..6096e0c 100644 Binary files a/__pycache__/daemon.cpython-313.pyc and b/__pycache__/daemon.cpython-313.pyc differ diff --git a/daemon.py b/daemon.py index 1402edf..2b5993a 100644 --- a/daemon.py +++ b/daemon.py @@ -129,16 +129,16 @@ def run_task(historical=False): logger.info(f"No trades fetched from {exchange.name}.") continue - # Hash-basierte Deduplizierung - # Hole existierende Hashes für Trades ab dem ältesten neuen Trade + # Hash-basierte Deduplizierung - IMMER prüfen! oldest_trade_ts = min(t.timestamp for t in trades) + newest_trade_ts = max(t.timestamp for t in trades) - # Nur prüfen wenn wir nicht einen komplett historischen Sync machen - if last_ts > datetime.datetime.min.replace(tzinfo=datetime.timezone.utc): - # Hole Hashes der letzten 7 Tage für diese Exchange - check_since = oldest_trade_ts - datetime.timedelta(days=1) - existing_hashes = get_existing_trade_hashes(db_url, exchange.name, check_since) - logger.info(f"Found {len(existing_hashes)} existing trade hashes in DB") + # Hole Hashes für den Zeitraum der neuen Trades (plus 1 Tag Puffer) + check_since = oldest_trade_ts - datetime.timedelta(days=1) + existing_hashes = get_existing_trade_hashes(db_url, exchange.name, check_since) + + if existing_hashes: + logger.info(f"Found {len(existing_hashes)} existing trade hashes in DB for period") # Filtere nur wirklich neue Trades new_trades = [] @@ -147,7 +147,8 @@ def run_task(historical=False): if trade_hash not in existing_hashes: new_trades.append(t) else: - # Historischer Sync - keine Deduplizierung nötig + # Keine existierenden Hashes gefunden - alle Trades sind neu + logger.info(f"No existing hashes found - all trades are new") new_trades = trades logger.info(f"Found {len(trades)} total trades, {len(new_trades)} are new.") diff --git a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc index 867d3a0..f307a0f 100644 Binary files a/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc and b/src/exchanges/__pycache__/deutsche_boerse.cpython-313.pyc differ diff --git a/src/exchanges/__pycache__/gettex.cpython-313.pyc b/src/exchanges/__pycache__/gettex.cpython-313.pyc index 92748ed..ef35bcc 100644 Binary files a/src/exchanges/__pycache__/gettex.cpython-313.pyc and b/src/exchanges/__pycache__/gettex.cpython-313.pyc differ diff --git a/src/exchanges/deutsche_boerse.py b/src/exchanges/deutsche_boerse.py index 73affb1..7df35f8 100644 --- a/src/exchanges/deutsche_boerse.py +++ b/src/exchanges/deutsche_boerse.py @@ -49,17 +49,23 @@ class DeutscheBoerseBase(BaseExchange): def _get_file_list(self) -> List[str]: """Holt die Dateiliste von der JSON API""" try: - response = requests.get(self.api_url, headers=HEADERS, timeout=30) + api_url = self.api_url + print(f"[{self.name}] Fetching file list from: {api_url}") + response = requests.get(api_url, headers=HEADERS, timeout=30) response.raise_for_status() data = response.json() files = data.get('CurrentFiles', []) print(f"[{self.name}] API returned {len(files)} files") + if files: + print(f"[{self.name}] Sample files: {files[:3]}") return files except Exception as e: print(f"[{self.name}] Error fetching file list from API: {e}") + import traceback + print(f"[{self.name}] Traceback: {traceback.format_exc()}") return [] def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]: @@ -128,7 +134,12 @@ class DeutscheBoerseBase(BaseExchange): return [] # NDJSON Format: Eine JSON-Zeile pro Trade - for line in content.strip().split('\n'): + lines = content.strip().split('\n') + if not lines or (len(lines) == 1 and not lines[0].strip()): + # Leere Datei + return [] + + for line in lines: if not line.strip(): continue try: @@ -295,13 +306,22 @@ class DeutscheBoerseBase(BaseExchange): # Alle passenden Dateien herunterladen und parsen (mit Rate-Limiting) successful = 0 + failed = 0 total_files = len(target_files) + if total_files == 0: + print(f"[{self.name}] No files to download for date {target_date}") + return [] + + print(f"[{self.name}] Starting download of {total_files} files...") + for i, file in enumerate(target_files): trades = self._download_and_parse_file(file) if trades: all_trades.extend(trades) successful += 1 + else: + failed += 1 # Rate-Limiting: Pause zwischen Downloads if i < total_files - 1: @@ -309,9 +329,9 @@ class DeutscheBoerseBase(BaseExchange): # Fortschritt alle 100 Dateien if (i + 1) % 100 == 0: - print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {len(all_trades)} trades so far") + print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {successful} successful, {len(all_trades)} trades so far") - print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades") + print(f"[{self.name}] Downloaded {successful} files ({failed} failed/empty), total {len(all_trades)} trades") return all_trades diff --git a/src/exchanges/gettex.py b/src/exchanges/gettex.py index c0f352d..c33d349 100644 --- a/src/exchanges/gettex.py +++ b/src/exchanges/gettex.py @@ -147,21 +147,37 @@ class GettexExchange(BaseExchange): if len(parts) >= 4: date_str = parts[1] # YYYYMMDD + if not date_str: + print(f"[GETTEX] WARNING: Could not extract date from filename: {filename}") + + # Debug: Zeige erste Zeile + if lines and len(lines) > 0: + print(f"[GETTEX] First line sample: {lines[0][:100]}") + # Gettex CSV hat KEINEN Header! # Format: ISIN,Zeit,Währung,Preis,Menge # z.B.: DE000BAY0017,09:15:03.638460,EUR,45.775,22 - for line in lines: + parse_errors = 0 + for i, line in enumerate(lines): if not line.strip(): continue try: trade = self._parse_headerless_csv_line(line, date_str) if trade: trades.append(trade) - except Exception: + else: + if i < 3: # Zeige nur erste paar Fehler + print(f"[GETTEX] Failed to parse line {i+1}: {line[:80]}") + except Exception as e: + parse_errors += 1 + if i < 3: + print(f"[GETTEX] Exception parsing line {i+1}: {e}, line: {line[:80]}") continue if trades: - print(f"[GETTEX] Parsed {len(trades)} trades from {filename}") + print(f"[GETTEX] Parsed {len(trades)} trades from {filename} ({len(lines)} lines, {parse_errors} errors)") + elif len(lines) > 0: + print(f"[GETTEX] No trades parsed from {filename} ({len(lines)} lines, {parse_errors} errors)") except requests.exceptions.HTTPError as e: if e.response.status_code != 404: @@ -189,11 +205,18 @@ class GettexExchange(BaseExchange): qty_str = parts[4].strip() # Validierung - if not isin or len(isin) != 12: # ISIN ist immer 12 Zeichen + if not isin: return None - price = float(price_str) - quantity = float(qty_str) + # ISIN kann 12 Zeichen sein, aber nicht immer (manchmal kürzer bei Testdaten) + if len(isin) < 10: + return None + + try: + price = float(price_str) + quantity = float(qty_str) + except ValueError: + return None if price <= 0 or quantity <= 0: return None @@ -231,7 +254,8 @@ class GettexExchange(BaseExchange): timestamp=timestamp ) - except Exception: + except Exception as e: + # Stille Fehlerbehandlung - nur bei Debug return None def _parse_csv_row(self, row: dict) -> Optional[Trade]: