Skip to content

stats

Custom nipype interfaces for FreeSurfer stats commands.

Classes:

Functions:

AparcStats

Bases: FSCommand

Custom nipype interface for FreeSurfer aparcstats2table command.

Methods:

  • run

    Run aparcstats2table command.

run

run(**inputs: Any) -> dict

Run aparcstats2table command.

Source code in src/pyfsviz/stats.py
132
133
134
def run(self, **inputs: Any) -> dict:
    """Run aparcstats2table command."""
    return super().run(**inputs)

AparcStatsInputSpec

Bases: FSTraitedSpec

Input specification for aparcstats2table command.

AparcStatsOutputSpec

Bases: TraitedSpec

Output specification for aparcstats2table command.

AsegStats

Bases: FSCommand

Custom nipype interface for FreeSurfer asegstats2table command.

Methods:

  • run

    Run asegstats2table command.

run

run(**inputs: Any) -> dict

Run asegstats2table command.

Source code in src/pyfsviz/stats.py
60
61
62
def run(self, **inputs: Any) -> dict:
    """Run asegstats2table command."""
    return super().run(**inputs)

AsegStatsInputSpec

Bases: FSTraitedSpec

Input specification for asegstats2table command.

AsegStatsOutputSpec

Bases: TraitedSpec

Output specification for asegstats2table command.

check_metrics

check_metrics(stats_files: list[Path], sd_threshold: float = 3.0) -> dict

Check metrics from stats files.

Parameters:

  • stats_files

    (list) –

    List of paths to stats files

  • sd_threshold

    (float, default: 3.0 ) –

    Standard deviation threshold, by default 3.0

Source code in src/pyfsviz/stats.py
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
def check_metrics(stats_files: list[Path], sd_threshold: float = 3.0) -> dict:
    """Check metrics from stats files.

    Parameters
    ----------
    stats_files : list
        List of paths to stats files
    sd_threshold : float, optional
        Standard deviation threshold, by default 3.0
    """
    metrics: dict[str, pd.DataFrame] = {}
    metric_summary: dict[str, dict[str, dict[str, Any]]] = {}
    for file in stats_files:
        if "combined" in file.stem:
            continue
        df = pd.read_csv(file)
        metrics[file.stem] = df

    for metric, data in metrics.items():
        # Initialize metric_summary for this metric
        metric_summary[metric] = {}

        # Get column names - skip first column (subject_id) and last few columns (typically metadata)
        region_cols = [
            col
            for col in data.columns[1:]
            if col not in ["Measure:volume", "lh.aparc.a2009s_thickness", "rh.aparc.a2009s_thickness"]
        ]
        id_col = data.columns[0]

        for region in region_cols:
            values = data[region].dropna()
            if len(values) == 0:
                metric_summary[metric][region] = {
                    "status": "no_data",
                    "message": "No data available",
                }
            else:
                values = values.astype(float)
                mean = values.mean()
                std_val = values.std()
                upper_bound = mean + sd_threshold * std_val
                lower_bound = mean - sd_threshold * std_val
                outliers = values[(values > upper_bound) | (values < lower_bound)]

                if len(outliers) > 0:
                    outlier_percentage = (len(outliers) / len(values)) * 100
                    outlier_subjects = []
                    for outlier_val in outliers:
                        outlier_rows = data[data[region] == outlier_val]
                        for _, row in outlier_rows.iterrows():
                            subject_id = row[id_col]
                            outlier_subjects.append(
                                {
                                    "subject_id": str(subject_id),
                                    "value": float(outlier_val),
                                },
                            )

                    unique_outliers = []
                    seen = set()
                    for outlier in outlier_subjects:
                        key = (outlier["subject_id"], outlier["value"])
                        if key not in seen:
                            unique_outliers.append(outlier)
                            seen.add(key)

                    metric_summary[metric][region] = {
                        "status": "outliers_detected",
                        "message": f"Found {len(outliers)} outliers ({outlier_percentage:.1f}%) beyond {sd_threshold} SD",
                        "outlier_count": len(outliers),
                        "outlier_percentage": outlier_percentage,
                        "outlier_subjects": unique_outliers,
                        "mean": mean,
                        "std": std_val,
                        "sd_threshold": sd_threshold,
                        "upper_bound": upper_bound,
                        "lower_bound": lower_bound,
                    }
                else:
                    metric_summary[metric][region] = {
                        "status": "passed",
                        "message": f"No outliers detected (mean: {mean:.2f}, ±{sd_threshold} SD: {lower_bound:.2f} to {upper_bound:.2f})",
                        "outlier_count": 0,
                        "outlier_percentage": 0.0,
                        "outlier_subjects": [],
                        "mean": mean,
                        "std": std_val,
                        "sd_threshold": sd_threshold,
                        "upper_bound": upper_bound,
                        "lower_bound": lower_bound,
                    }
    return metric_summary

gen_metric_plots

gen_metric_plots(stats_files: list[Path]) -> list

Generate plots from FreeSurfer stats files.

Parameters:

  • stats_files

    (list[Path]) –

    List of paths to stats files

Returns:

  • list

    List of plotly figure objects

Source code in src/pyfsviz/stats.py
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
def gen_metric_plots(stats_files: list[Path]) -> list:
    """Generate plots from FreeSurfer stats files.

    Parameters
    ----------
    stats_files: list
        List of paths to stats files

    Returns
    -------
    list
        List of plotly figure objects
    """
    plots = []
    metrics = {}
    for file in stats_files:
        if "lh" in file.stem or "rh" in file.stem:
            continue
        df = pd.read_csv(file)
        metrics[file.stem] = df

    for metric, data in metrics.items():
        idx_col = data.columns[0]
        if "hemi" in data.columns:
            for c in data.columns[1:]:
                fig = go.Figure()
                fig.add_trace(
                    go.Box(
                        y=data[data["hemi"] == "lh"][c],
                        boxpoints="suspectedoutliers",
                        marker={
                            "outliercolor": "rgb(0,0,0)",
                            "line": {"outlierwidth": 1, "outliercolor": "rgb(0,0,0)"},
                        },
                        name="lh",
                        text=data["subject_id"],
                    ),
                )
                fig.add_trace(
                    go.Box(
                        y=data[data["hemi"] == "rh"][c],
                        boxpoints="suspectedoutliers",
                        marker={
                            "outliercolor": "rgb(0,0,0)",
                            "line": {"outlierwidth": 1, "outliercolor": "rgb(0,0,0)"},
                        },
                        name="rh",
                        text=data["subject_id"],
                    ),
                )
                fig.update_layout(
                    boxmode="group",
                    yaxis={"title": {"text": c}},
                    xaxis={"title": {"text": "hemisphere"}},
                    title={"text": metric},
                )
                plots.append(fig)
        elif any("Left-" in c for c in data.columns):
            region_groups: dict[str, dict[str, str]] = {}
            for region in data.columns[1:]:  # Skip subject_id column
                # Extract base region name (remove hemisphere prefix if present)
                if region.startswith("Left-"):
                    base_region = region[5:]  # Remove 'Left-' prefix
                    hemisphere = "Left"
                elif region.startswith("Right-"):
                    base_region = region[6:]  # Remove 'Right-' prefix
                    hemisphere = "Right"
                elif region.startswith(("lh", "rh")):
                    base_region = region[2:]  # Remove 'lh' or 'rh' prefix
                    hemisphere = "Left" if region.startswith("lh") else "Right"
                else:
                    # No hemisphere prefix, treat as bilateral
                    base_region = region
                    hemisphere = "Bilateral"

                if base_region not in region_groups:
                    region_groups[base_region] = {}
                region_groups[base_region][hemisphere] = region

            for base_region, hemispheres in region_groups.items():
                fig = go.Figure()
                if len(hemispheres) > 1 and "Bilateral" not in hemispheres:
                    # Multiple hemispheres found, create combined plot
                    combined_data = []
                    for hemisphere, region_col in hemispheres.items():
                        region_data = data[[idx_col, region_col]].copy()
                        region_data = region_data.rename(columns={region_col: "value"})
                        region_data["hemisphere"] = hemisphere
                        combined_data.append(region_data)

                    if combined_data:
                        # Concatenate data from both hemispheres
                        plot_data = pd.concat(combined_data, ignore_index=True)

                        # Create box plot comparing hemispheres
                        fig.add_trace(
                            go.Box(
                                y=plot_data[plot_data["hemisphere"] == "Left"]["value"],
                                boxpoints="suspectedoutliers",
                                text=plot_data[idx_col],
                                name="left",
                                marker={
                                    "outliercolor": "rgb(0,0,0)",
                                    "line": {"outlierwidth": 1, "outliercolor": "rgb(0,0,0)"},
                                },
                            ),
                        )
                        fig.add_trace(
                            go.Box(
                                y=plot_data[plot_data["hemisphere"] == "Right"]["value"],
                                boxpoints="suspectedoutliers",
                                text=plot_data[idx_col],
                                name="right",
                                marker={
                                    "outliercolor": "rgb(0,0,0)",
                                    "line": {"outlierwidth": 1, "outliercolor": "rgb(0,0,0)"},
                                },
                            ),
                        )
                        fig.update_layout(
                            boxmode="group",
                            yaxis={"title": {"text": base_region}},
                            xaxis={"title": {"text": "hemisphere"}},
                        )
                        plots.append(fig)
                else:
                    region_col = next(iter(hemispheres.values()))
                    fig.add_trace(
                        go.Box(
                            y=data[region_col],
                            boxpoints="suspectedoutliers",
                            text=data[idx_col],
                            name=base_region,
                            marker={
                                "outliercolor": "rgb(0,0,0)",
                                "line": {"outlierwidth": 1, "outliercolor": "rgb(0,0,0)"},
                            },
                        ),
                    )
                    fig.update_layout(yaxis={"title": {"text": base_region}})
                    plots.append(fig)
        else:
            # Handle aseg data (no hemisphere prefix)
            for region in data.columns[1:]:  # Skip subject_id column
                fig = go.Figure()
                fig.add_trace(
                    go.Box(
                        y=data[region],
                        boxpoints="suspectedoutliers",
                        text=data[idx_col],  # subject_id column
                        name=region,
                        marker={
                            "outliercolor": "rgb(0,0,0)",
                            "line": {"outlierwidth": 1, "outliercolor": "rgb(0,0,0)"},
                        },
                    ),
                )
                fig.update_layout(
                    yaxis={"title": {"text": region}},
                    title={"text": metric},
                )
                plots.append(fig)

    return plots

get_stats

get_stats(subjects: list[str], output_dir: str, measures: list[str] | None = None, hemis: list[str] | None = None) -> dict[str, Path | list[Path]]

Get aseg and aparc stats from subjects.

Parameters:

  • subjects

    (list) –

    List of subject IDs

  • output_dir

    (str) –
  • measures

    (list, default: None ) –

    List of measures to get, by default None

  • hemis

    (list, default: None ) –

    List of hemispheres to get, by default None

Source code in src/pyfsviz/stats.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def get_stats(
    subjects: list[str],
    output_dir: str,
    measures: list[str] | None = None,
    hemis: list[str] | None = None,
) -> dict[str, Path | list[Path]]:
    """Get aseg and aparc stats from subjects.

    Parameters
    ----------
    subjects : list
        List of subject IDs
    output_dir : str
    measures : list, optional
        List of measures to get, by default None
    hemis : list, optional
        List of hemispheres to get, by default None
    """
    stats: dict[str, Path | list[Path]] = {}
    stats["aseg"] = _get_aseg_stats(subjects, "aseg.csv", output_dir=output_dir)
    stats["aparc"] = _get_aparc_stats(subjects, "aparc.csv", output_dir=output_dir, measures=measures, hemis=hemis)
    return stats