Compare commits

...

4 Commits

4 changed files with 759 additions and 0 deletions

View File

@@ -0,0 +1,126 @@
# Generated by Django 4.1.13 on 2025-12-16 09:14
from django.db import migrations, models, transaction
import django.db.models.deletion
def log(msg=''):
print(f' -> {msg}')
def ensure_asset_single_node(apps, schema_editor):
print('')
log('Checking that all assets are linked to only one node...')
Asset = apps.get_model('assets', 'Asset')
Through = Asset.nodes.through
assets_count_multi_nodes = Through.objects.values('asset_id').annotate(
node_count=models.Count('node_id')
).filter(node_count__gt=1).count()
if assets_count_multi_nodes > 0:
raise Exception(
f'There are {assets_count_multi_nodes} assets associated with more than one node. '
'Please ensure each asset is linked to only one node before applying this migration.'
)
else:
log('All assets are linked to only one node. Proceeding with the migration.')
def ensure_asset_has_node(apps, schema_editor):
log('Checking that all assets are linked to at least one node...')
Asset = apps.get_model('assets', 'Asset')
Through = Asset.nodes.through
asset_count = Asset.objects.count()
through_asset_count = Through.objects.values('asset_id').count()
assets_count_without_node = asset_count - through_asset_count
if assets_count_without_node > 0:
raise Exception(
f'Some assets ({assets_count_without_node}) are not associated with any node. '
'Please ensure all assets are linked to a node before applying this migration.'
)
else:
log('All assets are linked to a node. Proceeding with the migration.')
def migrate_asset_node_id_field(apps, schema_editor):
log('Migrating node_id field for all assets...')
Asset = apps.get_model('assets', 'Asset')
Through = Asset.nodes.through
assets = Asset.objects.filter(node_id__isnull=True)
log (f'Found {assets.count()} assets to migrate.')
asset_node_mapper = {
str(asset_id): str(node_id)
for asset_id, node_id in Through.objects.values_list('asset_id', 'node_id')
}
# 测试
asset_node_mapper.pop(None, None) # Remove any entries with None keys
for asset in assets:
node_id = asset_node_mapper.get(str(asset.id))
if not node_id:
raise Exception(
f'Asset (ID: {asset.id}) is not associated with any node. '
'Cannot migrate node_id field.'
)
asset.node_id = node_id
with transaction.atomic():
total = len(assets)
batch_size = 5000
for i in range(0, total, batch_size):
batch = assets[i:i+batch_size]
start = i + 1
end = min(i + batch_size, total)
for asset in batch:
asset.save(update_fields=['node_id'])
log(f"Migrated {start}-{end}/{total} assets")
count = Asset.objects.filter(node_id__isnull=True).count()
if count > 0:
log('Warning: Some assets still have null node_id after migration.')
raise Exception('Migration failed: Some assets have null node_id.')
count = Asset.objects.filter(node_id__isnull=False).count()
log(f'Successfully migrated node_id for {count} assets.')
class Migration(migrations.Migration):
dependencies = [
('assets', '0019_alter_asset_connectivity'),
]
operations = [
migrations.RunPython(
ensure_asset_single_node,
reverse_code=migrations.RunPython.noop
),
migrations.RunPython(
ensure_asset_has_node,
reverse_code=migrations.RunPython.noop
),
migrations.AddField(
model_name='asset',
name='node',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.PROTECT, related_name='direct_assets', to='assets.node', verbose_name='Node'),
),
migrations.RunPython(
migrate_asset_node_id_field,
reverse_code=migrations.RunPython.noop
),
migrations.AlterField(
model_name='asset',
name='node',
field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, related_name='direct_assets', to='assets.node', verbose_name='Node'),
),
]

View File

@@ -172,6 +172,11 @@ class Asset(NodesRelationMixin, LabeledMixin, AbsConnectivity, JSONFilterMixin,
"assets.Zone", null=True, blank=True, related_name='assets', "assets.Zone", null=True, blank=True, related_name='assets',
verbose_name=_("Zone"), on_delete=models.SET_NULL verbose_name=_("Zone"), on_delete=models.SET_NULL
) )
node = models.ForeignKey(
'assets.Node', null=False, blank=False, on_delete=models.PROTECT,
related_name='direct_assets', verbose_name=_("Node")
)
# TODO: 删除完代码中所有使用的地方后,再删除 nodes 字段,并将 node 字段的 related_name 改为 'assets'
nodes = models.ManyToManyField( nodes = models.ManyToManyField(
'assets.Node', default=default_node, related_name='assets', verbose_name=_("Nodes") 'assets.Node', default=default_node, related_name='assets', verbose_name=_("Nodes")
) )

View File

@@ -0,0 +1,358 @@
import os
import sys
import django
import random
from datetime import datetime
if os.path.exists('../../apps'):
sys.path.insert(0, '../../apps')
if os.path.exists('../apps'):
sys.path.insert(0, '../apps')
elif os.path.exists('./apps'):
sys.path.insert(0, './apps')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jumpserver.settings")
django.setup()
from assets.models import Asset, Node
from orgs.models import Organization
from django.db.models import Count
OUTPUT_FILE = 'report_cleanup_and_keep_one_node_for_multi_parent_nodes_assets.txt'
# Special organization IDs and names
SPECIAL_ORGS = {
'00000000-0000-0000-0000-000000000000': 'GLOBAL',
'00000000-0000-0000-0000-000000000002': 'DEFAULT',
'00000000-0000-0000-0000-000000000004': 'SYSTEM',
}
try:
AssetNodeThrough = Asset.nodes.through
except Exception as e:
print("Failed to get AssetNodeThrough model. Check Asset.nodes field definition.")
raise e
def log(msg=''):
"""Print log with timestamp to console"""
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
def write_report(content):
"""Write content to report file"""
with open(OUTPUT_FILE, 'a', encoding='utf-8') as f:
f.write(content)
def get_org_name(org_id, orgs_map):
"""Get organization name, check special orgs first, then orgs_map"""
# Check if it's a special organization
org_id_str = str(org_id)
if org_id_str in SPECIAL_ORGS:
return SPECIAL_ORGS[org_id_str]
# Try to get from orgs_map
org = orgs_map.get(org_id)
if org:
return org.name
return 'Unknown'
def find_and_cleanup_multi_parent_assets():
"""Find and cleanup assets with multiple parent nodes"""
log("Searching for assets with multiple parent nodes...")
# Find all asset_ids that belong to multiple node_ids
multi_parent_assets = AssetNodeThrough.objects.values('asset_id').annotate(
node_count=Count('node_id', distinct=True)
).filter(node_count__gt=1).order_by('-node_count')
total_count = multi_parent_assets.count()
log(f"Found {total_count:,} assets with multiple parent nodes\n")
if total_count == 0:
log("✓ All assets already have single parent node")
return {}
# Collect all asset_ids and node_ids
asset_ids = [item['asset_id'] for item in multi_parent_assets]
# Get all through records
all_through_records = AssetNodeThrough.objects.filter(asset_id__in=asset_ids)
node_ids = list(set(through.node_id for through in all_through_records))
# Batch fetch all objects
log("Batch loading Asset objects...")
assets_map = {asset.id: asset for asset in Asset.objects.filter(id__in=asset_ids)}
log("Batch loading Node objects...")
nodes_map = {node.id: node for node in Node.objects.filter(id__in=node_ids)}
# Batch fetch all Organization objects
org_ids = list(set(asset.org_id for asset in assets_map.values())) + \
list(set(node.org_id for node in nodes_map.values()))
org_ids = list(set(org_ids))
log("Batch loading Organization objects...")
orgs_map = {org.id: org for org in Organization.objects.filter(id__in=org_ids)}
# Build mapping of asset_id -> list of through_records
asset_nodes_map = {}
for through in all_through_records:
if through.asset_id not in asset_nodes_map:
asset_nodes_map[through.asset_id] = []
asset_nodes_map[through.asset_id].append(through)
# Organize by organization
org_cleanup_data = {} # org_id -> { asset_id -> { keep_node_id, remove_node_ids } }
for item in multi_parent_assets:
asset_id = item['asset_id']
# Get Asset object
asset = assets_map.get(asset_id)
if not asset:
log(f"⚠ Asset {asset_id} not found in map, skipping")
continue
org_id = asset.org_id
# Initialize org data if not exists
if org_id not in org_cleanup_data:
org_cleanup_data[org_id] = {}
# Get all nodes for this asset
through_records = asset_nodes_map.get(asset_id, [])
if len(through_records) < 2:
continue
# Randomly select one node to keep
keep_through = random.choice(through_records)
remove_throughs = [t for t in through_records if t.id != keep_through.id]
org_cleanup_data[org_id][asset_id] = {
'asset_name': asset.name,
'keep_node_id': keep_through.node_id,
'keep_node': nodes_map.get(keep_through.node_id),
'remove_records': remove_throughs,
'remove_nodes': [nodes_map.get(t.node_id) for t in remove_throughs]
}
return org_cleanup_data
def perform_cleanup(org_cleanup_data, dry_run=False):
"""Perform the actual cleanup - delete extra node relationships"""
if dry_run:
log("DRY RUN: Simulating cleanup process (no data will be deleted)...")
else:
log("\nStarting cleanup process...")
total_deleted = 0
for org_id in org_cleanup_data.keys():
for asset_id, cleanup_info in org_cleanup_data[org_id].items():
# Delete the extra relationships
for through_record in cleanup_info['remove_records']:
if not dry_run:
through_record.delete()
total_deleted += 1
return total_deleted
def verify_cleanup():
"""Verify that there are no more assets with multiple parent nodes"""
log("\n" + "="*80)
log("VERIFICATION: Checking for remaining assets with multiple parent nodes...")
log("="*80)
# Find all asset_ids that belong to multiple node_ids
multi_parent_assets = AssetNodeThrough.objects.values('asset_id').annotate(
node_count=Count('node_id', distinct=True)
).filter(node_count__gt=1).order_by('-node_count')
remaining_count = multi_parent_assets.count()
if remaining_count == 0:
log(f"✓ Verification successful: No assets with multiple parent nodes remaining\n")
return True
else:
log(f"✗ Verification failed: Found {remaining_count:,} assets still with multiple parent nodes\n")
# Show some details
for item in multi_parent_assets[:10]:
asset_id = item['asset_id']
node_count = item['node_count']
try:
asset = Asset.objects.get(id=asset_id)
log(f" - Asset: {asset.name} ({asset_id}) has {node_count} parent nodes")
except:
log(f" - Asset ID: {asset_id} has {node_count} parent nodes")
if remaining_count > 10:
log(f" ... and {remaining_count - 10} more")
return False
def generate_report(org_cleanup_data, total_deleted):
"""Generate and write report to file"""
# Clear previous report
if os.path.exists(OUTPUT_FILE):
os.remove(OUTPUT_FILE)
# Write header
write_report(f"Multi-Parent Assets Cleanup Report\n")
write_report(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
write_report(f"{'='*80}\n\n")
# Get all organizations
all_org_ids = list(set(org_id for org_id in org_cleanup_data.keys()))
all_orgs = {org.id: org for org in Organization.objects.filter(id__in=all_org_ids)}
# Calculate statistics
total_orgs = Organization.objects.count()
orgs_processed = len(org_cleanup_data)
orgs_no_issues = total_orgs - orgs_processed
total_assets_cleaned = sum(len(assets) for assets in org_cleanup_data.values())
# Overview
write_report("OVERVIEW\n")
write_report(f"{'-'*80}\n")
write_report(f"Total organizations: {total_orgs:,}\n")
write_report(f"Organizations processed: {orgs_processed:,}\n")
write_report(f"Organizations without issues: {orgs_no_issues:,}\n")
write_report(f"Total assets cleaned: {total_assets_cleaned:,}\n")
total_relationships = AssetNodeThrough.objects.count()
write_report(f"Total relationships (through records): {total_relationships:,}\n")
write_report(f"Total relationships deleted: {total_deleted:,}\n\n")
# Summary by organization
write_report("Summary by Organization:\n")
for org_id in sorted(org_cleanup_data.keys()):
org_name = get_org_name(org_id, all_orgs)
asset_count = len(org_cleanup_data[org_id])
write_report(f" - {org_name} ({org_id}): {asset_count:,} assets cleaned\n")
write_report(f"\n{'='*80}\n\n")
# Detailed cleanup information grouped by organization
for org_id in sorted(org_cleanup_data.keys()):
org_name = get_org_name(org_id, all_orgs)
asset_count = len(org_cleanup_data[org_id])
write_report(f"ORGANIZATION: {org_name} ({org_id})\n")
write_report(f"Total assets cleaned: {asset_count:,}\n")
write_report(f"{'-'*80}\n\n")
for asset_id, cleanup_info in org_cleanup_data[org_id].items():
write_report(f"Asset: {cleanup_info['asset_name']} ({asset_id})\n")
# Kept node
keep_node = cleanup_info['keep_node']
if keep_node:
write_report(f" ✓ Kept: {keep_node.name} (key: {keep_node.key}) (id: {keep_node.id})\n")
else:
write_report(f" ✓ Kept: Unknown (id: {cleanup_info['keep_node_id']})\n")
# Removed nodes
write_report(f" ✗ Removed: {len(cleanup_info['remove_nodes'])} node(s)\n")
for node in cleanup_info['remove_nodes']:
if node:
write_report(f" - {node.name} (key: {node.key}) (id: {node.id})\n")
else:
write_report(f" - Unknown\n")
write_report(f"\n")
write_report(f"{'='*80}\n\n")
log(f"✓ Report written to {OUTPUT_FILE}")
def main():
try:
# Display warning banner
warning_message = """
╔══════════════════════════════════════════════════════════════════════════════╗
║ ⚠️ WARNING ⚠️ ║
║ ║
║ This script is designed for TEST/FAKE DATA ONLY! ║
║ DO NOT run this script in PRODUCTION environment! ║
║ ║
║ This script will DELETE asset-node relationships from the database. ║
║ Use only for data cleanup in development/testing environments. ║
║ ║
╚══════════════════════════════════════════════════════════════════════════════╝
"""
print(warning_message)
# Ask user to confirm before proceeding
confirm = input("Do you understand the warning and want to continue? (yes/no): ").strip().lower()
if confirm not in ['yes', 'y']:
log("✗ Operation cancelled by user")
sys.exit(0)
log("✓ Proceeding with operation\n")
org_cleanup_data = find_and_cleanup_multi_parent_assets()
if not org_cleanup_data:
log("✓ Cleanup complete, no assets to process")
sys.exit(0)
total_assets = sum(len(assets) for assets in org_cleanup_data.values())
log(f"\nProcessing {total_assets:,} assets across {len(org_cleanup_data):,} organizations...")
# First, do a dry-run to show what will be deleted
log("\n" + "="*80)
log("PREVIEW: Simulating cleanup process...")
log("="*80)
total_deleted_preview = perform_cleanup(org_cleanup_data, dry_run=True)
log(f"✓ Dry-run complete: {total_deleted_preview:,} relationships would be deleted\n")
# Generate preview report
generate_report(org_cleanup_data, total_deleted_preview)
log(f"✓ Preview report written to {OUTPUT_FILE}\n")
# Ask for confirmation 3 times before actual deletion
log("="*80)
log("FINAL CONFIRMATION: Do you want to proceed with actual cleanup?")
log("="*80)
confirmation_count = 3
for attempt in range(1, confirmation_count + 1):
response = input(f"Confirm cleanup (attempt {attempt}/{confirmation_count})? (yes/no): ").strip().lower()
if response not in ['yes', 'y']:
log(f"✗ Cleanup cancelled by user at attempt {attempt}")
sys.exit(1)
log("✓ All confirmations received, proceeding with actual cleanup")
# Perform cleanup
total_deleted = perform_cleanup(org_cleanup_data)
log(f"✓ Deleted {total_deleted:,} relationships")
# Generate final report
generate_report(org_cleanup_data, total_deleted)
# Verify cleanup by checking for remaining multi-parent assets
verify_cleanup()
log(f"✓ Cleanup complete: processed {total_assets:,} assets")
sys.exit(0)
except Exception as e:
log(f"✗ Error occurred: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(2)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,270 @@
import os
import sys
import django
from datetime import datetime
if os.path.exists('../../apps'):
sys.path.insert(0, '../../apps')
if os.path.exists('../apps'):
sys.path.insert(0, '../apps')
elif os.path.exists('./apps'):
sys.path.insert(0, './apps')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jumpserver.settings")
django.setup()
from assets.models import Asset, Node
from orgs.models import Organization
from django.db.models import Count
OUTPUT_FILE = 'report_find_multi_parent_nodes_assets.txt'
# Special organization IDs and names
SPECIAL_ORGS = {
'00000000-0000-0000-0000-000000000000': 'GLOBAL',
'00000000-0000-0000-0000-000000000002': 'DEFAULT',
'00000000-0000-0000-0000-000000000004': 'SYSTEM',
}
try:
AssetNodeThrough = Asset.nodes.through
except Exception as e:
print("Failed to get AssetNodeThrough model. Check Asset.nodes field definition.")
raise e
def log(msg=''):
"""Print log with timestamp"""
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
def get_org_name(org_id, orgs_map):
"""Get organization name, check special orgs first, then orgs_map"""
# Check if it's a special organization
org_id_str = str(org_id)
if org_id_str in SPECIAL_ORGS:
return SPECIAL_ORGS[org_id_str]
# Try to get from orgs_map
org = orgs_map.get(org_id)
if org:
return org.name
return 'Unknown'
def write_report(content):
"""Write content to report file"""
with open(OUTPUT_FILE, 'a', encoding='utf-8') as f:
f.write(content)
def find_assets_multiple_parents():
"""Find assets belonging to multiple node_ids organized by organization"""
log("Searching for assets with multiple parent nodes...")
# Find all asset_ids that belong to multiple node_ids
multi_parent_assets = AssetNodeThrough.objects.values('asset_id').annotate(
node_count=Count('node_id', distinct=True)
).filter(node_count__gt=1).order_by('-node_count')
total_count = multi_parent_assets.count()
log(f"Found {total_count:,} assets with multiple parent nodes\n")
if total_count == 0:
log("✓ All assets belong to only one node")
return {}
# Collect all asset_ids and node_ids that need to be fetched
asset_ids = [item['asset_id'] for item in multi_parent_assets]
# Get all through records for these assets
all_through_records = AssetNodeThrough.objects.filter(asset_id__in=asset_ids)
node_ids = list(set(through.node_id for through in all_through_records))
# Batch fetch all Asset and Node objects
log("Batch loading Asset objects...")
assets_map = {asset.id: asset for asset in Asset.objects.filter(id__in=asset_ids)}
log("Batch loading Node objects...")
nodes_map = {node.id: node for node in Node.objects.filter(id__in=node_ids)}
# Batch fetch all Organization objects
org_ids = list(set(asset.org_id for asset in assets_map.values())) + \
list(set(node.org_id for node in nodes_map.values()))
org_ids = list(set(org_ids)) # Remove duplicates
log("Batch loading Organization objects...")
orgs_map = {org.id: org for org in Organization.objects.filter(id__in=org_ids)}
# Build mapping of asset_id -> list of through_records
asset_nodes_map = {}
for through in all_through_records:
if through.asset_id not in asset_nodes_map:
asset_nodes_map[through.asset_id] = []
asset_nodes_map[through.asset_id].append(through)
# Organize by organization first, then by node count, then by asset
org_assets_data = {} # org_id -> { node_count -> [asset_data] }
for item in multi_parent_assets:
asset_id = item['asset_id']
node_count = item['node_count']
# Get Asset object from map
asset = assets_map.get(asset_id)
if not asset:
log(f"⚠ Asset {asset_id} not found in map, skipping")
continue
org_id = asset.org_id
# Initialize org data if not exists
if org_id not in org_assets_data:
org_assets_data[org_id] = {}
# Get all nodes for this asset
through_records = asset_nodes_map.get(asset_id, [])
node_details = []
for through in through_records:
# Get Node object from map
node = nodes_map.get(through.node_id)
if not node:
log(f"⚠ Node {through.node_id} not found in map, skipping")
continue
node_details.append({
'id': node.id,
'name': node.name,
'key': node.key,
'path': node.full_value if hasattr(node, 'full_value') else ''
})
if not node_details:
continue
if node_count not in org_assets_data[org_id]:
org_assets_data[org_id][node_count] = []
org_assets_data[org_id][node_count].append({
'asset_id': asset.id,
'asset_name': asset.name,
'nodes': node_details
})
return org_assets_data
def generate_report(org_assets_data):
"""Generate and write report to file organized by organization"""
# Clear previous report
if os.path.exists(OUTPUT_FILE):
os.remove(OUTPUT_FILE)
# Write header
write_report(f"Multi-Parent Assets Report\n")
write_report(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
write_report(f"{'='*80}\n\n")
# Get all organizations
all_org_ids = list(set(org_id for org_id in org_assets_data.keys()))
all_orgs = {org.id: org for org in Organization.objects.filter(id__in=all_org_ids)}
# Calculate statistics
total_orgs = Organization.objects.count()
orgs_with_issues = len(org_assets_data)
orgs_without_issues = total_orgs - orgs_with_issues
total_assets_with_issues = sum(
len(assets)
for org_id in org_assets_data
for assets in org_assets_data[org_id].values()
)
# Overview
write_report("OVERVIEW\n")
write_report(f"{'-'*80}\n")
write_report(f"Total organizations: {total_orgs:,}\n")
write_report(f"Organizations with multiple-parent assets: {orgs_with_issues:,}\n")
write_report(f"Organizations without issues: {orgs_without_issues:,}\n")
write_report(f"Total assets with multiple parent nodes: {total_assets_with_issues:,}\n\n")
# Summary by organization
write_report("Summary by Organization:\n")
for org_id in sorted(org_assets_data.keys()):
org_name = get_org_name(org_id, all_orgs)
org_asset_count = sum(
len(assets)
for assets in org_assets_data[org_id].values()
)
write_report(f" - {org_name} ({org_id}): {org_asset_count:,} assets\n")
write_report(f"\n{'='*80}\n\n")
# Detailed sections grouped by organization, then node count
for org_id in sorted(org_assets_data.keys()):
org_name = get_org_name(org_id, all_orgs)
org_asset_count = sum(
len(assets)
for assets in org_assets_data[org_id].values()
)
write_report(f"ORGANIZATION: {org_name} ({org_id})\n")
write_report(f"Total assets with issues: {org_asset_count:,}\n")
write_report(f"{'-'*80}\n\n")
# Group by node count within this organization
for node_count in sorted(org_assets_data[org_id].keys(), reverse=True):
assets = org_assets_data[org_id][node_count]
write_report(f" Section: {node_count} Parent Nodes ({len(assets):,} assets)\n")
write_report(f" {'-'*76}\n\n")
for asset in assets:
write_report(f" {asset['asset_name']} ({asset['asset_id']})\n")
for node in asset['nodes']:
write_report(f" {node['name']} ({node['key']}) ({node['path']}) ({node['id']})\n")
write_report(f"\n")
write_report(f"\n")
write_report(f"{'='*80}\n\n")
log(f"✓ Report written to {OUTPUT_FILE}")
def main():
try:
org_assets_data = find_assets_multiple_parents()
if not org_assets_data:
log("✓ Detection complete, no issues found")
sys.exit(0)
total_assets = sum(
len(assets)
for org_id in org_assets_data
for assets in org_assets_data[org_id].values()
)
log(f"Generating report for {total_assets:,} assets across {len(org_assets_data):,} organizations...")
generate_report(org_assets_data)
log(f"✗ Detected {total_assets:,} assets with multiple parent nodes")
sys.exit(1)
except Exception as e:
log(f"✗ Error occurred: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(2)
if __name__ == "__main__":
main()