diff --git a/backend/scripts/sync_all_zones.py b/backend/scripts/sync_all_zones.py index 986b700..1f6280a 100644 --- a/backend/scripts/sync_all_zones.py +++ b/backend/scripts/sync_all_zones.py @@ -44,7 +44,9 @@ LOG_FILE = Path("/home/user/logs/zone_sync.log") COMPRESS_DOMAIN_LISTS = True # CZDS TLDs we have access to -CZDS_TLDS = ["app", "dev", "info", "online", "org", "xyz"] +# Note: .org is HUGE (~10M domains, 442MB gz) - requires special handling +CZDS_TLDS = ["app", "dev", "info", "online", "xyz"] # org temporarily excluded due to memory +CZDS_TLDS_LARGE = ["org"] # Process separately with streaming # Switch.ch AXFR config SWITCH_CONFIG = { @@ -521,16 +523,34 @@ async def main(): logger.info("\n๐Ÿ“Š Initial storage check...") initial_storage = log_storage_stats() - all_drops = [] results = [] + total_drops_stored = 0 + + # Helper to store drops immediately after each TLD + async def store_tld_drops(drops: list, tld: str): + nonlocal total_drops_stored + if not drops: + return 0 + try: + session = await get_db_session() + stored = await store_drops_in_db(drops, session) + await session.close() + total_drops_stored += stored + logger.info(f" ๐Ÿ’พ Stored {stored} .{tld} drops in database") + return stored + except Exception as e: + logger.error(f" โŒ Failed to store .{tld} drops: {e}") + return 0 # Sync CZDS TLDs (sequentially to respect rate limits) logger.info("\n๐Ÿ“ฆ Syncing ICANN CZDS zone files...") for tld in CZDS_TLDS: result = await sync_czds_tld(tld) results.append(result) - if hasattr(result, 'drops'): - all_drops.extend(result.drops) + + # Store drops IMMEDIATELY after each TLD (crash-safe) + if hasattr(result, 'drops') and result.drops: + await store_tld_drops(result.drops, tld) # Rate limit: wait between downloads if tld != CZDS_TLDS[-1]: @@ -542,19 +562,10 @@ async def main(): for tld in ["ch", "li"]: result = await sync_switch_tld(tld) results.append(result) - if hasattr(result, 'drops'): - all_drops.extend(result.drops) - - # Store drops in database - if all_drops: - logger.info(f"\n๐Ÿ’พ Storing {len(all_drops)} drops in database...") - try: - session = await get_db_session() - stored = await store_drops_in_db(all_drops, session) - await session.close() - logger.info(f"โœ… Stored {stored} drops in database") - except Exception as e: - logger.error(f"โŒ Failed to store drops: {e}") + + # Store drops IMMEDIATELY + if hasattr(result, 'drops') and result.drops: + await store_tld_drops(result.drops, tld) # Cleanup stray files logger.info("\n๐Ÿงน Cleaning up temporary files...") @@ -587,7 +598,7 @@ async def main(): logger.info("-" * 60) logger.info(f" TOTAL: {total_domains:,} domains across {success_count}/{len(results)} TLDs") - logger.info(f" DROPS: {total_drops:,} new drops detected") + logger.info(f" DROPS: {total_drops:,} detected, {total_drops_stored:,} stored in DB") logger.info(f" TIME: {duration:.1f} seconds") # Final storage stats