# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2025 MINERVA European Support Centre contributors.
# See https://www.apache.org/licenses/LICENSE-2.0 for the full license text.

from nbiatoolkit import NBIAClient
import os

MANIFEST_FILE = "./justinkirby/the-cancer-imaging-archive-lidcidri/versions/1/TCIA_LIDC-IDRI_20200921.tcia"
DOWNLOAD_DIR = "./data"
N_PARALLEL = 4


def parse_manifest(path):
    series = []
    with open(path, "r") as f:
        lines = f.readlines()

    start = False
    for line in lines:
        line = line.strip()

        if line == "ListOfSeriesToDownload=":
            start = True
            continue

        if start and line:
            series.append(line)

    return series


def sanitize(text):
    """Make strings filesystem-safe"""
    return str(text).replace(".", "_").replace("/", "_")


def main():
    os.makedirs(DOWNLOAD_DIR, exist_ok=True)

    series_list = parse_manifest(MANIFEST_FILE)
    print(f"[INFO] Found {len(series_list)} series")

    with NBIAClient() as client:
        for i, uid in enumerate(series_list, start=1):
            print(f"\n[{i}/{len(series_list)}] Processing {uid}")

            try:
                meta = client.getSeries(SeriesInstanceUID=uid)

                if not meta:
                    print(f"[WARNING] No metadata for {uid}, skipping")
                    continue

                meta = meta[0]

                patient_id = sanitize(meta.get("PatientID", "UNKNOWN_PATIENT"))
                study_uid = sanitize(meta.get("StudyInstanceUID", "UNKNOWN_STUDY"))
                series_uid = sanitize(uid)

                series_dir = os.path.join(
                    DOWNLOAD_DIR,
                    patient_id,
                    study_uid,
                    series_uid
                )

                os.makedirs(series_dir, exist_ok=True)

                client.downloadSeries(
                    SeriesInstanceUID=uid,
                    downloadDir=series_dir,
                    filePattern="%InstanceNumber.dcm",
                    nParallel=N_PARALLEL
                )

            except Exception as e:
                print(f"[ERROR] Failed for {uid}: {e}")


if __name__ == "__main__":
    main()