-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfinal_verification.py
More file actions
48 lines (38 loc) · 1.69 KB
/
Copy pathfinal_verification.py
File metadata and controls
48 lines (38 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import json
import sys
sys.stdout.reconfigure(encoding='utf-8')
with open('new_project/data/01_raw/clinical/subject_metadata.json', encoding='utf-8') as f:
data = json.load(f)
# 统计V1和V2数据
v1_subjects = {s_id: s for s_id, s in data.items() if s.get('data_version') == 'v1'}
v2_subjects = {s_id: s for s_id, s in data.items() if s.get('data_version') == 'v2'}
print("=== 最终验证结果 ===\n")
# V1数据统计
print(f"V1受试者总数: {len(v1_subjects)}")
v1_by_group = {}
for s_id, s in v1_subjects.items():
group = s.get('group', 'unknown')
if group not in v1_by_group:
v1_by_group[group] = []
v1_by_group[group].append(s_id)
for group in ['control', 'mci', 'ad']:
subjects = sorted(v1_by_group.get(group, []))
print(f" {group.upper()}: {len(subjects)}个")
# 检查任务数
with_tasks = sum(1 for s_id in subjects if len(data[s_id].get('tasks_available', [])) > 0)
print(f" 有任务: {with_tasks}/{len(subjects)}")
# V2数据统计
print(f"\nV2受试者总数: {len(v2_subjects)}")
v2_with_tasks = sum(1 for s in v2_subjects.values() if len(s.get('tasks_available', [])) > 0)
print(f" 有任务: {v2_with_tasks}/{len(v2_subjects)}")
# AD组详细映射
print("\n=== AD组映射关系 ===")
ad_v1 = sorted([s_id for s_id, s in v1_subjects.items() if s.get('group') == 'ad'])
for s_id in ad_v1[:5]: # 只显示前5个
s = data[s_id]
source = s.get('source_path', 'N/A')
tasks = len(s.get('tasks_available', []))
print(f"{s_id}: {tasks} tasks (source: {source})")
print(f"... 共{len(ad_v1)}个AD受试者")
print("\n✅ 数据修复完成!")
print(f"总计: {len(data)} 个受试者 (V1: {len(v1_subjects)}, V2: {len(v2_subjects)})")