Fix: Store drops immediately after each TLD (crash-safe)
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled

- Drops are now stored in DB right after each TLD sync completes
- If process crashes, already synced TLDs' drops are preserved
- Temporarily exclude .org (too large, causes OOM)
- Show stored count in summary
This commit is contained in:
2025-12-17 09:39:21 +01:00
parent 6e9c5a1394
commit 90ec2648fc

View File

@ -44,7 +44,9 @@ LOG_FILE = Path("/home/user/logs/zone_sync.log")
COMPRESS_DOMAIN_LISTS = True COMPRESS_DOMAIN_LISTS = True
# CZDS TLDs we have access to # CZDS TLDs we have access to
CZDS_TLDS = ["app", "dev", "info", "online", "org", "xyz"] # Note: .org is HUGE (~10M domains, 442MB gz) - requires special handling
CZDS_TLDS = ["app", "dev", "info", "online", "xyz"] # org temporarily excluded due to memory
CZDS_TLDS_LARGE = ["org"] # Process separately with streaming
# Switch.ch AXFR config # Switch.ch AXFR config
SWITCH_CONFIG = { SWITCH_CONFIG = {
@ -521,16 +523,34 @@ async def main():
logger.info("\n📊 Initial storage check...") logger.info("\n📊 Initial storage check...")
initial_storage = log_storage_stats() initial_storage = log_storage_stats()
all_drops = []
results = [] results = []
total_drops_stored = 0
# Helper to store drops immediately after each TLD
async def store_tld_drops(drops: list, tld: str):
nonlocal total_drops_stored
if not drops:
return 0
try:
session = await get_db_session()
stored = await store_drops_in_db(drops, session)
await session.close()
total_drops_stored += stored
logger.info(f" 💾 Stored {stored} .{tld} drops in database")
return stored
except Exception as e:
logger.error(f" ❌ Failed to store .{tld} drops: {e}")
return 0
# Sync CZDS TLDs (sequentially to respect rate limits) # Sync CZDS TLDs (sequentially to respect rate limits)
logger.info("\n📦 Syncing ICANN CZDS zone files...") logger.info("\n📦 Syncing ICANN CZDS zone files...")
for tld in CZDS_TLDS: for tld in CZDS_TLDS:
result = await sync_czds_tld(tld) result = await sync_czds_tld(tld)
results.append(result) results.append(result)
if hasattr(result, 'drops'):
all_drops.extend(result.drops) # Store drops IMMEDIATELY after each TLD (crash-safe)
if hasattr(result, 'drops') and result.drops:
await store_tld_drops(result.drops, tld)
# Rate limit: wait between downloads # Rate limit: wait between downloads
if tld != CZDS_TLDS[-1]: if tld != CZDS_TLDS[-1]:
@ -542,19 +562,10 @@ async def main():
for tld in ["ch", "li"]: for tld in ["ch", "li"]:
result = await sync_switch_tld(tld) result = await sync_switch_tld(tld)
results.append(result) results.append(result)
if hasattr(result, 'drops'):
all_drops.extend(result.drops) # Store drops IMMEDIATELY
if hasattr(result, 'drops') and result.drops:
# Store drops in database await store_tld_drops(result.drops, tld)
if all_drops:
logger.info(f"\n💾 Storing {len(all_drops)} drops in database...")
try:
session = await get_db_session()
stored = await store_drops_in_db(all_drops, session)
await session.close()
logger.info(f"✅ Stored {stored} drops in database")
except Exception as e:
logger.error(f"❌ Failed to store drops: {e}")
# Cleanup stray files # Cleanup stray files
logger.info("\n🧹 Cleaning up temporary files...") logger.info("\n🧹 Cleaning up temporary files...")
@ -587,7 +598,7 @@ async def main():
logger.info("-" * 60) logger.info("-" * 60)
logger.info(f" TOTAL: {total_domains:,} domains across {success_count}/{len(results)} TLDs") logger.info(f" TOTAL: {total_domains:,} domains across {success_count}/{len(results)} TLDs")
logger.info(f" DROPS: {total_drops:,} new drops detected") logger.info(f" DROPS: {total_drops:,} detected, {total_drops_stored:,} stored in DB")
logger.info(f" TIME: {duration:.1f} seconds") logger.info(f" TIME: {duration:.1f} seconds")
# Final storage stats # Final storage stats