Skip to content

xnat_etl

Source code in src/luna/radiology/cli/xnat_etl.py
def xnat_etl(
    input_xnat_url,
    username,
    password,
    project_name,
    num_cores,
    debug_limit,
    output_dir,
):
    """ """
    xnat = pyxnat.Interface(input_xnat_url, user=username, password=password)

    p = xnat.select.project(project_name)

    logger.info(f"{p}")

    with Client(
        host=os.environ["HOSTNAME"],
        n_workers=((num_cores + 7) // 8),
        threads_per_worker=8,
    ) as client:
        logger.info(f"Client: {client} ")
        logger.info(f"Dashboard: {client.dashboard_link}")

        dfs = [
            delayed(get_patient_scans)(s, output_dir)
            for i, s in enumerate(p.subjects())
            if (i < debug_limit or debug_limit < 0)
        ]
        df_project = delayed(pd.concat)(dfs).compute()

        logger.info(df_project)

    output_file = os.path.join(output_dir, f"xnat_data_{project_name}.parquet")
    df_project.to_parquet(output_file)

    return {
        "feature_data": output_file,
        "segment_keys": {
            "xnat_project_id": project_name,
        },
    }