def xnat_etl(
input_xnat_url,
username,
password,
project_name,
num_cores,
debug_limit,
output_dir,
):
""" """
xnat = pyxnat.Interface(input_xnat_url, user=username, password=password)
p = xnat.select.project(project_name)
logger.info(f"{p}")
with Client(
host=os.environ["HOSTNAME"],
n_workers=((num_cores + 7) // 8),
threads_per_worker=8,
) as client:
logger.info(f"Client: {client} ")
logger.info(f"Dashboard: {client.dashboard_link}")
dfs = [
delayed(get_patient_scans)(s, output_dir)
for i, s in enumerate(p.subjects())
if (i < debug_limit or debug_limit < 0)
]
df_project = delayed(pd.concat)(dfs).compute()
logger.info(df_project)
output_file = os.path.join(output_dir, f"xnat_data_{project_name}.parquet")
df_project.to_parquet(output_file)
return {
"feature_data": output_file,
"segment_keys": {
"xnat_project_id": project_name,
},
}