This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -34,6 +34,10 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
files = []
|
files = []
|
||||||
|
html_text = response.text
|
||||||
|
|
||||||
|
# Debug: Response-Länge
|
||||||
|
print(f"[{self.name}] Response length: {len(html_text)} chars")
|
||||||
|
|
||||||
# Primär: Regex-basierte Extraktion (zuverlässiger)
|
# Primär: Regex-basierte Extraktion (zuverlässiger)
|
||||||
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
# Pattern: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||||
@@ -47,13 +51,16 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
# Generisches Pattern
|
# Generisches Pattern
|
||||||
pattern = r'[A-Z]{4}-posttrade-\d{4}-\d{2}-\d{2}T\d{2}_\d{2}\.json\.gz'
|
pattern = r'[A-Z]{4}-posttrade-\d{4}-\d{2}-\d{2}T\d{2}_\d{2}\.json\.gz'
|
||||||
|
|
||||||
matches = re.findall(pattern, response.text)
|
matches = re.findall(pattern, html_text)
|
||||||
files = list(set(matches))
|
files = list(set(matches))
|
||||||
|
|
||||||
# Sekundär: BeautifulSoup für Links (falls Regex nichts findet)
|
# Sekundär: BeautifulSoup für Links (falls Regex nichts findet)
|
||||||
if not files:
|
if not files:
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(html_text, 'html.parser')
|
||||||
for link in soup.find_all('a'):
|
all_links = soup.find_all('a')
|
||||||
|
print(f"[{self.name}] Found {len(all_links)} total links on page")
|
||||||
|
|
||||||
|
for link in all_links:
|
||||||
href = link.get('href', '')
|
href = link.get('href', '')
|
||||||
text = link.get_text(strip=True)
|
text = link.get_text(strip=True)
|
||||||
|
|
||||||
@@ -65,6 +72,15 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
elif text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
elif text and 'posttrade' in text.lower() and '.json.gz' in text.lower():
|
||||||
files.append(text)
|
files.append(text)
|
||||||
|
|
||||||
|
# Tertiär: Suche nach jedem "posttrade" im HTML und extrahiere Dateinamen
|
||||||
|
if not files:
|
||||||
|
# Allgemeineres Pattern für beliebige Dateinamen mit "posttrade"
|
||||||
|
general_pattern = r'[\w-]*posttrade[\w-]*\d{4}[-_]\d{2}[-_]\d{2}[T_]\d{2}[_:]\d{2}\.json\.gz'
|
||||||
|
matches = re.findall(general_pattern, html_text, re.IGNORECASE)
|
||||||
|
files = list(set(matches))
|
||||||
|
if files:
|
||||||
|
print(f"[{self.name}] Found {len(files)} files via general pattern")
|
||||||
|
|
||||||
print(f"[{self.name}] Found {len(files)} files via regex/soup")
|
print(f"[{self.name}] Found {len(files)} files via regex/soup")
|
||||||
return files
|
return files
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -233,6 +249,34 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
print(f"Error parsing record: {e}")
|
print(f"Error parsing record: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _generate_expected_files(self, target_date: datetime.date) -> List[str]:
|
||||||
|
"""
|
||||||
|
Generiert erwartete Dateinamen basierend auf dem bekannten Format.
|
||||||
|
Format: PREFIX-posttrade-YYYY-MM-DDTHH_MM.json.gz
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
files = []
|
||||||
|
|
||||||
|
# Extrahiere Prefix aus base_url (z.B. DETR, DFRA, DGAT)
|
||||||
|
prefix_match = re.search(r'/([A-Z]{4})-posttrade', self.base_url)
|
||||||
|
prefix = prefix_match.group(1) if prefix_match else 'DETR'
|
||||||
|
|
||||||
|
date_str = target_date.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
# Generiere für alle Stunden des Handelstages (07:00 - 22:00 UTC, alle Minuten)
|
||||||
|
for hour in range(7, 23):
|
||||||
|
for minute in range(0, 60):
|
||||||
|
files.append(f"{prefix}-posttrade-{date_str}T{hour:02d}_{minute:02d}.json.gz")
|
||||||
|
|
||||||
|
# Auch frühe Dateien vom Folgetag (nach Mitternacht UTC)
|
||||||
|
next_date = target_date + timedelta(days=1)
|
||||||
|
next_date_str = next_date.strftime('%Y-%m-%d')
|
||||||
|
for hour in range(0, 3):
|
||||||
|
for minute in range(0, 60):
|
||||||
|
files.append(f"{prefix}-posttrade-{next_date_str}T{hour:02d}_{minute:02d}.json.gz")
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
||||||
"""
|
"""
|
||||||
Holt alle Trades vom Vortag (oder seit since_date).
|
Holt alle Trades vom Vortag (oder seit since_date).
|
||||||
@@ -248,7 +292,7 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
|
|
||||||
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
print(f"[{self.name}] Fetching trades for date: {target_date}")
|
||||||
|
|
||||||
# Dateiliste holen
|
# Erst versuchen, Dateiliste von der Seite zu holen
|
||||||
files = self._get_file_list()
|
files = self._get_file_list()
|
||||||
print(f"[{self.name}] Found {len(files)} total files")
|
print(f"[{self.name}] Found {len(files)} total files")
|
||||||
|
|
||||||
@@ -256,11 +300,24 @@ class DeutscheBoerseBase(BaseExchange):
|
|||||||
target_files = self._filter_files_for_date(files, target_date)
|
target_files = self._filter_files_for_date(files, target_date)
|
||||||
print(f"[{self.name}] {len(target_files)} files match target date")
|
print(f"[{self.name}] {len(target_files)} files match target date")
|
||||||
|
|
||||||
|
# Falls keine Dateien von der Seite gefunden, generiere erwartete Dateinamen
|
||||||
|
if not target_files:
|
||||||
|
print(f"[{self.name}] No files from page, trying generated filenames...")
|
||||||
|
target_files = self._generate_expected_files(target_date)
|
||||||
|
print(f"[{self.name}] Trying {len(target_files)} potential files")
|
||||||
|
|
||||||
# Alle passenden Dateien herunterladen und parsen
|
# Alle passenden Dateien herunterladen und parsen
|
||||||
|
successful = 0
|
||||||
for file in target_files:
|
for file in target_files:
|
||||||
trades = self._download_and_parse_file(file)
|
trades = self._download_and_parse_file(file)
|
||||||
all_trades.extend(trades)
|
if trades:
|
||||||
print(f"[{self.name}] Parsed {len(trades)} trades from {file}")
|
all_trades.extend(trades)
|
||||||
|
successful += 1
|
||||||
|
if successful <= 5:
|
||||||
|
print(f"[{self.name}] Parsed {len(trades)} trades from {file}")
|
||||||
|
|
||||||
|
if successful > 5:
|
||||||
|
print(f"[{self.name}] ... and {successful - 5} more files")
|
||||||
|
|
||||||
print(f"[{self.name}] Total trades fetched: {len(all_trades)}")
|
print(f"[{self.name}] Total trades fetched: {len(all_trades)}")
|
||||||
return all_trades
|
return all_trades
|
||||||
|
|||||||
@@ -133,17 +133,32 @@ class GettexExchange(BaseExchange):
|
|||||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||||
csv_text = f.read().decode('utf-8')
|
csv_text = f.read().decode('utf-8')
|
||||||
|
|
||||||
# CSV parsen
|
# Debug: Zeige erste Zeilen und Spalten
|
||||||
reader = csv.DictReader(io.StringIO(csv_text), delimiter=';')
|
lines = csv_text.strip().split('\n')
|
||||||
|
if lines:
|
||||||
|
print(f"[GETTEX] CSV has {len(lines)} lines, first line (headers): {lines[0][:200]}")
|
||||||
|
if len(lines) > 1:
|
||||||
|
print(f"[GETTEX] Sample data row: {lines[1][:200]}")
|
||||||
|
|
||||||
|
# CSV parsen - versuche verschiedene Delimiter
|
||||||
|
delimiter = ';' if ';' in lines[0] else ','
|
||||||
|
reader = csv.DictReader(io.StringIO(csv_text), delimiter=delimiter)
|
||||||
|
|
||||||
|
row_count = 0
|
||||||
for row in reader:
|
for row in reader:
|
||||||
|
row_count += 1
|
||||||
|
if row_count == 1:
|
||||||
|
print(f"[GETTEX] CSV columns: {list(row.keys())}")
|
||||||
try:
|
try:
|
||||||
trade = self._parse_csv_row(row)
|
trade = self._parse_csv_row(row)
|
||||||
if trade:
|
if trade:
|
||||||
trades.append(trade)
|
trades.append(trade)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[GETTEX] Error parsing row: {e}")
|
if row_count <= 3:
|
||||||
|
print(f"[GETTEX] Error parsing row {row_count}: {e}, row keys: {list(row.keys())}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
print(f"[GETTEX] Processed {row_count} rows, found {len(trades)} valid trades")
|
||||||
|
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
if e.response.status_code != 404:
|
if e.response.status_code != 404:
|
||||||
@@ -157,48 +172,102 @@ class GettexExchange(BaseExchange):
|
|||||||
"""
|
"""
|
||||||
Parst eine CSV-Zeile zu einem Trade.
|
Parst eine CSV-Zeile zu einem Trade.
|
||||||
|
|
||||||
Erwartete Spalten (RTS Format):
|
Unterstützte Spalten (RTS1/RTS2 Format, verschiedene Varianten):
|
||||||
- TrdDtTm: Trading Date/Time
|
- ISIN / FinInstrmId / Isin: Instrument Identifier
|
||||||
- ISIN: Instrument Identifier
|
- Pric / Price / pric: Preis
|
||||||
- Pric: Preis
|
- Qty / Quantity / qty: Menge
|
||||||
- Qty: Menge
|
- TrdDtTm / TradingDateTime / TrdgDtTm: Trading Date/Time
|
||||||
- Ccy: Währung
|
- TrdDt / TradingDate: Trading Date
|
||||||
|
- TrdTm / TradingTime: Trading Time
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# ISIN
|
# ISIN - versuche verschiedene Spaltennamen
|
||||||
isin = row.get('ISIN', row.get('FinInstrmId', ''))
|
isin = None
|
||||||
|
for key in ['ISIN', 'Isin', 'isin', 'FinInstrmId', 'FinInstrmId.Id', 'Id']:
|
||||||
|
if key in row and row[key]:
|
||||||
|
isin = str(row[key]).strip()
|
||||||
|
break
|
||||||
|
|
||||||
if not isin:
|
if not isin:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Preis
|
# Preis - versuche verschiedene Spaltennamen
|
||||||
price_str = row.get('Pric', row.get('Price', '0'))
|
price = None
|
||||||
price_str = price_str.replace(',', '.')
|
for key in ['Pric', 'Price', 'pric', 'price', 'Pric.Pric.MntryVal.Amt', 'TradPric']:
|
||||||
price = float(price_str)
|
if key in row and row[key]:
|
||||||
|
price_str = str(row[key]).replace(',', '.').strip()
|
||||||
|
try:
|
||||||
|
price = float(price_str)
|
||||||
|
if price > 0:
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
if price <= 0:
|
if not price or price <= 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Menge
|
# Menge - versuche verschiedene Spaltennamen
|
||||||
qty_str = row.get('Qty', row.get('Quantity', '0'))
|
quantity = None
|
||||||
qty_str = qty_str.replace(',', '.')
|
for key in ['Qty', 'Quantity', 'qty', 'quantity', 'TradQty', 'Qty.Unit']:
|
||||||
quantity = float(qty_str)
|
if key in row and row[key]:
|
||||||
|
qty_str = str(row[key]).replace(',', '.').strip()
|
||||||
|
try:
|
||||||
|
quantity = float(qty_str)
|
||||||
|
if quantity > 0:
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
if quantity <= 0:
|
if not quantity or quantity <= 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Timestamp
|
# Timestamp - versuche verschiedene Formate
|
||||||
ts_str = row.get('TrdDtTm', row.get('TradingDateTime', ''))
|
ts_str = None
|
||||||
|
|
||||||
|
# Erst kombiniertes Feld versuchen
|
||||||
|
for key in ['TrdDtTm', 'TradingDateTime', 'TrdgDtTm', 'Timestamp', 'timestamp']:
|
||||||
|
if key in row and row[key]:
|
||||||
|
ts_str = str(row[key]).strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
# Falls nicht gefunden, separate Felder kombinieren
|
||||||
if not ts_str:
|
if not ts_str:
|
||||||
# Fallback: Separate Felder
|
trd_dt = None
|
||||||
trd_dt = row.get('TrdDt', '')
|
trd_tm = '00:00:00'
|
||||||
trd_tm = row.get('TrdTm', '00:00:00')
|
|
||||||
ts_str = f"{trd_dt}T{trd_tm}"
|
for key in ['TrdDt', 'TradingDate', 'Date', 'date']:
|
||||||
|
if key in row and row[key]:
|
||||||
|
trd_dt = str(row[key]).strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
for key in ['TrdTm', 'TradingTime', 'Time', 'time']:
|
||||||
|
if key in row and row[key]:
|
||||||
|
trd_tm = str(row[key]).strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
if trd_dt:
|
||||||
|
ts_str = f"{trd_dt}T{trd_tm}"
|
||||||
|
|
||||||
|
if not ts_str:
|
||||||
|
return None
|
||||||
|
|
||||||
# Parse Timestamp (UTC)
|
# Parse Timestamp (UTC)
|
||||||
ts_str = ts_str.replace('Z', '+00:00')
|
ts_str = ts_str.replace('Z', '+00:00')
|
||||||
if 'T' not in ts_str:
|
if 'T' not in ts_str:
|
||||||
ts_str = ts_str.replace(' ', 'T')
|
ts_str = ts_str.replace(' ', 'T')
|
||||||
|
|
||||||
|
# Entferne Mikrosekunden wenn zu lang
|
||||||
|
if '.' in ts_str:
|
||||||
|
parts = ts_str.split('.')
|
||||||
|
if len(parts) > 1:
|
||||||
|
ms_part = parts[1].split('+')[0].split('-')[0]
|
||||||
|
if len(ms_part) > 6:
|
||||||
|
ts_str = parts[0] + '.' + ms_part[:6]
|
||||||
|
if '+' in parts[1]:
|
||||||
|
ts_str += '+' + parts[1].split('+')[1]
|
||||||
|
elif '-' in parts[1][1:]:
|
||||||
|
ts_str += '-' + parts[1].split('-')[-1]
|
||||||
|
|
||||||
timestamp = datetime.fromisoformat(ts_str)
|
timestamp = datetime.fromisoformat(ts_str)
|
||||||
if timestamp.tzinfo is None:
|
if timestamp.tzinfo is None:
|
||||||
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
||||||
@@ -213,7 +282,7 @@ class GettexExchange(BaseExchange):
|
|||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[GETTEX] Error parsing CSV row: {e}")
|
# Nur bei den ersten paar Fehlern loggen
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
def fetch_latest_trades(self, include_yesterday: bool = True, since_date: datetime = None) -> List[Trade]:
|
||||||
@@ -289,7 +358,6 @@ class GettexExchange(BaseExchange):
|
|||||||
trades = []
|
trades = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print(f"[{self.name}] Downloading: {url}")
|
|
||||||
response = requests.get(url, headers=HEADERS, timeout=60)
|
response = requests.get(url, headers=HEADERS, timeout=60)
|
||||||
|
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
@@ -301,16 +369,26 @@ class GettexExchange(BaseExchange):
|
|||||||
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as f:
|
||||||
csv_text = f.read().decode('utf-8')
|
csv_text = f.read().decode('utf-8')
|
||||||
|
|
||||||
# CSV parsen
|
# Debug: Zeige erste Zeilen
|
||||||
reader = csv.DictReader(io.StringIO(csv_text), delimiter=';')
|
lines = csv_text.strip().split('\n')
|
||||||
|
if len(lines) <= 1:
|
||||||
|
# Datei ist leer oder nur Header
|
||||||
|
return []
|
||||||
|
|
||||||
|
# CSV parsen - versuche verschiedene Delimiter
|
||||||
|
delimiter = ';' if ';' in lines[0] else (',' if ',' in lines[0] else '\t')
|
||||||
|
reader = csv.DictReader(io.StringIO(csv_text), delimiter=delimiter)
|
||||||
|
|
||||||
|
row_count = 0
|
||||||
for row in reader:
|
for row in reader:
|
||||||
|
row_count += 1
|
||||||
try:
|
try:
|
||||||
trade = self._parse_csv_row(row)
|
trade = self._parse_csv_row(row)
|
||||||
if trade:
|
if trade:
|
||||||
trades.append(trade)
|
trades.append(trade)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[{self.name}] Error parsing row: {e}")
|
if row_count <= 2:
|
||||||
|
print(f"[{self.name}] Error parsing row: {e}, keys: {list(row.keys())[:5]}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")
|
print(f"[{self.name}] Parsed {len(trades)} trades from {filename}")
|
||||||
|
|||||||
Reference in New Issue
Block a user