This commit is contained in:
Binary file not shown.
19
daemon.py
19
daemon.py
@@ -129,16 +129,16 @@ def run_task(historical=False):
|
||||
logger.info(f"No trades fetched from {exchange.name}.")
|
||||
continue
|
||||
|
||||
# Hash-basierte Deduplizierung
|
||||
# Hole existierende Hashes für Trades ab dem ältesten neuen Trade
|
||||
# Hash-basierte Deduplizierung - IMMER prüfen!
|
||||
oldest_trade_ts = min(t.timestamp for t in trades)
|
||||
newest_trade_ts = max(t.timestamp for t in trades)
|
||||
|
||||
# Nur prüfen wenn wir nicht einen komplett historischen Sync machen
|
||||
if last_ts > datetime.datetime.min.replace(tzinfo=datetime.timezone.utc):
|
||||
# Hole Hashes der letzten 7 Tage für diese Exchange
|
||||
check_since = oldest_trade_ts - datetime.timedelta(days=1)
|
||||
existing_hashes = get_existing_trade_hashes(db_url, exchange.name, check_since)
|
||||
logger.info(f"Found {len(existing_hashes)} existing trade hashes in DB")
|
||||
# Hole Hashes für den Zeitraum der neuen Trades (plus 1 Tag Puffer)
|
||||
check_since = oldest_trade_ts - datetime.timedelta(days=1)
|
||||
existing_hashes = get_existing_trade_hashes(db_url, exchange.name, check_since)
|
||||
|
||||
if existing_hashes:
|
||||
logger.info(f"Found {len(existing_hashes)} existing trade hashes in DB for period")
|
||||
|
||||
# Filtere nur wirklich neue Trades
|
||||
new_trades = []
|
||||
@@ -147,7 +147,8 @@ def run_task(historical=False):
|
||||
if trade_hash not in existing_hashes:
|
||||
new_trades.append(t)
|
||||
else:
|
||||
# Historischer Sync - keine Deduplizierung nötig
|
||||
# Keine existierenden Hashes gefunden - alle Trades sind neu
|
||||
logger.info(f"No existing hashes found - all trades are new")
|
||||
new_trades = trades
|
||||
|
||||
logger.info(f"Found {len(trades)} total trades, {len(new_trades)} are new.")
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -49,17 +49,23 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
def _get_file_list(self) -> List[str]:
|
||||
"""Holt die Dateiliste von der JSON API"""
|
||||
try:
|
||||
response = requests.get(self.api_url, headers=HEADERS, timeout=30)
|
||||
api_url = self.api_url
|
||||
print(f"[{self.name}] Fetching file list from: {api_url}")
|
||||
response = requests.get(api_url, headers=HEADERS, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
files = data.get('CurrentFiles', [])
|
||||
|
||||
print(f"[{self.name}] API returned {len(files)} files")
|
||||
if files:
|
||||
print(f"[{self.name}] Sample files: {files[:3]}")
|
||||
return files
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error fetching file list from API: {e}")
|
||||
import traceback
|
||||
print(f"[{self.name}] Traceback: {traceback.format_exc()}")
|
||||
return []
|
||||
|
||||
def _filter_files_for_date(self, files: List[str], target_date: datetime.date) -> List[str]:
|
||||
@@ -128,7 +134,12 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
return []
|
||||
|
||||
# NDJSON Format: Eine JSON-Zeile pro Trade
|
||||
for line in content.strip().split('\n'):
|
||||
lines = content.strip().split('\n')
|
||||
if not lines or (len(lines) == 1 and not lines[0].strip()):
|
||||
# Leere Datei
|
||||
return []
|
||||
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
@@ -295,13 +306,22 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
# Alle passenden Dateien herunterladen und parsen (mit Rate-Limiting)
|
||||
successful = 0
|
||||
failed = 0
|
||||
total_files = len(target_files)
|
||||
|
||||
if total_files == 0:
|
||||
print(f"[{self.name}] No files to download for date {target_date}")
|
||||
return []
|
||||
|
||||
print(f"[{self.name}] Starting download of {total_files} files...")
|
||||
|
||||
for i, file in enumerate(target_files):
|
||||
trades = self._download_and_parse_file(file)
|
||||
if trades:
|
||||
all_trades.extend(trades)
|
||||
successful += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
# Rate-Limiting: Pause zwischen Downloads
|
||||
if i < total_files - 1:
|
||||
@@ -309,9 +329,9 @@ class DeutscheBoerseBase(BaseExchange):
|
||||
|
||||
# Fortschritt alle 100 Dateien
|
||||
if (i + 1) % 100 == 0:
|
||||
print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {len(all_trades)} trades so far")
|
||||
print(f"[{self.name}] Progress: {i + 1}/{total_files} files, {successful} successful, {len(all_trades)} trades so far")
|
||||
|
||||
print(f"[{self.name}] Downloaded {successful} files, total {len(all_trades)} trades")
|
||||
print(f"[{self.name}] Downloaded {successful} files ({failed} failed/empty), total {len(all_trades)} trades")
|
||||
return all_trades
|
||||
|
||||
|
||||
|
||||
@@ -147,21 +147,37 @@ class GettexExchange(BaseExchange):
|
||||
if len(parts) >= 4:
|
||||
date_str = parts[1] # YYYYMMDD
|
||||
|
||||
if not date_str:
|
||||
print(f"[GETTEX] WARNING: Could not extract date from filename: {filename}")
|
||||
|
||||
# Debug: Zeige erste Zeile
|
||||
if lines and len(lines) > 0:
|
||||
print(f"[GETTEX] First line sample: {lines[0][:100]}")
|
||||
|
||||
# Gettex CSV hat KEINEN Header!
|
||||
# Format: ISIN,Zeit,Währung,Preis,Menge
|
||||
# z.B.: DE000BAY0017,09:15:03.638460,EUR,45.775,22
|
||||
for line in lines:
|
||||
parse_errors = 0
|
||||
for i, line in enumerate(lines):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
trade = self._parse_headerless_csv_line(line, date_str)
|
||||
if trade:
|
||||
trades.append(trade)
|
||||
except Exception:
|
||||
else:
|
||||
if i < 3: # Zeige nur erste paar Fehler
|
||||
print(f"[GETTEX] Failed to parse line {i+1}: {line[:80]}")
|
||||
except Exception as e:
|
||||
parse_errors += 1
|
||||
if i < 3:
|
||||
print(f"[GETTEX] Exception parsing line {i+1}: {e}, line: {line[:80]}")
|
||||
continue
|
||||
|
||||
if trades:
|
||||
print(f"[GETTEX] Parsed {len(trades)} trades from {filename}")
|
||||
print(f"[GETTEX] Parsed {len(trades)} trades from {filename} ({len(lines)} lines, {parse_errors} errors)")
|
||||
elif len(lines) > 0:
|
||||
print(f"[GETTEX] No trades parsed from {filename} ({len(lines)} lines, {parse_errors} errors)")
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code != 404:
|
||||
@@ -189,11 +205,18 @@ class GettexExchange(BaseExchange):
|
||||
qty_str = parts[4].strip()
|
||||
|
||||
# Validierung
|
||||
if not isin or len(isin) != 12: # ISIN ist immer 12 Zeichen
|
||||
if not isin:
|
||||
return None
|
||||
|
||||
price = float(price_str)
|
||||
quantity = float(qty_str)
|
||||
# ISIN kann 12 Zeichen sein, aber nicht immer (manchmal kürzer bei Testdaten)
|
||||
if len(isin) < 10:
|
||||
return None
|
||||
|
||||
try:
|
||||
price = float(price_str)
|
||||
quantity = float(qty_str)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
if price <= 0 or quantity <= 0:
|
||||
return None
|
||||
@@ -231,7 +254,8 @@ class GettexExchange(BaseExchange):
|
||||
timestamp=timestamp
|
||||
)
|
||||
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
# Stille Fehlerbehandlung - nur bei Debug
|
||||
return None
|
||||
|
||||
def _parse_csv_row(self, row: dict) -> Optional[Trade]:
|
||||
|
||||
Reference in New Issue
Block a user