Zernike and Bhatia-Wolf Polynomial Expansions

First select an expansion scheme

Here is the Noll expansion scheme:

where is the radial distance, is the azimuthal angle, is the degree of the radial polynomials (radial order), , and is the azimuthal frequency.

is defined as

summed over all discrete set of points.

Here are the “definitions” for the first 21 indices:

noll_desc <- function(j) {
  switch(j,
    "1" = "piston : bias",
    "2" = "tilt_h : image shift, pattern independent",
    "3" = "tilt_v : image shift, pattern independent",
    "4" = "defocus : degradation",
    "5" = "pri_astigmatism_oblique : focus shift, orientation dependant ",
    "6" = "pri_astigmatism_v : focus shift, orientation dependant",
    "7" = "pri_coma_v : image shift, asymmetry and pattern dependent",
    "8" = "pri_coma_h : image shift, asymmetry and pattern dependent",
    "9" = "pri_trefoil_v : anomalies with threefold symmetry",
    "10" = "pri_trefoil_oblique : anomalies with threefold symmetry",
    "11" = "pri_spherical : focus shift, pattern dependent",
    "12" = "sec_astigmatism_v",
    "13" = "sec_astigmatism_oblique",
    "14" = "pri_quadrafiol_v",
    "15" = "pri_quadrafiol_oblique",
    "16" = "pri_coma_h",
    "17" = "pri_coma_v",
    "18" = "sec_trefoil_oblique",
    "19" = "sec_trefoil_v",
    "20" = "sec_pentafoil_oblique",
    "21" = "sec_pentafoil_v",
  )
}

For example, the following prints out the definitions for indices j = 1, 2:

map(1:2, ~ noll_desc(j = .x))

## [[1]]
## [1] "piston : bias"
## 
## [[2]]
## [1] "tilt_h : image shift, pattern independent"

For the Bhatia-Wolf case, the expansion scheme is implemented using the original paper. In both cases, the azimuthal frequency is normalized by default.

The original Cartesian coordinates will be converted to polar coordinates prior to the series expansion.

zernike_expansion <- function(x, y, noll, normalized = TRUE, print_zernikes = TRUE, type = "zernike") {
  # cartesian to polar
  rho <- sqrt((x)^2 + (y)^2)
  rho <- rho / max(rho)
  theta <- atan2(y, x)
  j <- noll

  # odd/even, even returns TRUE
  .even <- function(i) {
    if ((i %% 2) == 0) TRUE else FALSE
  }

  .factorial_z <- function(n) {
    if (n <= 1) 1 else n * .factorial_z(n - 1)
  }

  if (type == "zernike") {
    n <- as.integer(sqrt(2 * j - 1) + 0.5) - 1

    if (.even(n)) {
      m <- 2 * as.integer((2 * j + 1 - n * (n + 1)) / 4) # even
    } else {
      m <- 2 * as.integer((2 * (j + 1) - n * (n + 1)) / 4) - 1 # odd
    }

    # Vnm(rho)
    Rnm <- rep(0, length(rho))
    for (s in 0:((n - m) / 2)) {
      numerator <- (-1)^s * .factorial_z(n - s)
      denominator <- .factorial_z(s) * .factorial_z((n + m) / 2 - s) * .factorial_z((n - m) / 2 - s)
      Rnm <- Rnm + (numerator / denominator) * rho^(n - 2 * s)
    }

    # Pnm ie RMS error
    .Pnm <- function(j_det, Rnm) {
      switch(j_det,
        "j" = Rnm,
        "jodd" = Rnm * sin(m * theta),
        "jeven" = Rnm * cos(m * theta)
      )
    }
  }

  if (type == "bhatia_wolf") {
    n <- ceiling(sqrt(j) - 1)
    m <- floor((j - ((n - 1)^2 + 2 * (n - 1) + 1)) / 2)

    Rnm <- rep(0, length(rho))
    for (s in 0:((n - m) / 2)) { # Vnm(rho)
      numerator <- (-1)^s * .factorial_z(2 * n + 1 - s)
      denominator <- .factorial_z(s) * .factorial_z(n - m - s) * .factorial_z(n + m + 1 - s)
      Rnm <- Rnm + (numerator / denominator) * rho^(n - s)
    }

    # Pnm ie RMS error
    .Pnm <- function(j_det, Rnm) {
      switch(j_det,
        "j" = sqrt(n + 1) * Rnm,
        "jodd" = sqrt(2 * (n + 1)) * Rnm * sin(m * theta),
        "jeven" = sqrt(2 * (n + 1)) * Rnm * cos(m * theta)
      )
    }
  }

  j_det <- ifelse((.even(j) & m > 0), "jeven", ifelse((!.even(j) & m > 0), "jodd", "j"))

  # Kronecker delta
  .delta <- function(m) {
    if (m != 0) 0 else 1
  }

  # Nmn, Normalization factor
  n_factor_z <- function(n, m) {
    sqrt(2 * (n + 1) / (1 + .delta(m)))
  }

  if (print_zernikes) {
    print(str_glue("noll: {j} |> n: {n}, m: {m} | {noll_desc(as.character(j))} | norm_factor: {n_factor_z(n,m) |>round(3)}"))
  }

  #  If normalized(TRUE): then return(Nnm*Pnm) else return(Pnm)
  if (type == "zernike" & normalized) {
    n_factor_z(n = n, m = m) * .Pnm(j_det, Rnm)
  } else {
    .Pnm(j_det, Rnm)
  }
}

Data

I simulated two disk surfaces using set parameters, one for tilt and a hat shape on a 100-point LHS grid. I also added some speckled noise to the surface to obscure the shape.

d <- read_rds("d_head.rds")

d |>
  unnest() |>
  mutate(z = as.numeric(z)) |>
  ggplot(aes(x, y, label = z, col = z)) +
  geom_point() +
  # guides(col = "none") +
  coord_fixed() +
  facet_wrap(~id) +
  ggtitle("Simulated Data for Two Disks")

Functions for interpolations:

# This function is used later
fit_zernike <- function(Pnm, G, method = "ls") {
  if (method == "ls") {
    # this is the least squares method
    Pnm <- as.matrix(Pnm)
    # R is the Moore-Penrose pseudoinverse of R0
    R <- solve(t(Pnm) %*% Pnm) %*% t(Pnm)
    znm <- R %*% G
  }
  return(znm)
}

make_interp_plots <- function(d, ids, moments = c(1, 2, 3, 4, 11), type = "zernike", method = "zernike") {
  l <- htmltools::tagList()

  for (id in ids) {
    d_id <- d |>
      filter(id == {{ id }}) |>
      unnest(data)

    outer_radius <- reshape::round_any(max(sqrt(d_id$x^2 + d_id$y^2)), 0.1, f = ceiling)
    spatial_res <- 50
    minitics <- seq(-outer_radius, outer_radius, length.out = spatial_res)

    # Zernike

    if (method == "zernike") {
      Pnm <- get_Pnm(d_id$x, d_id$y, moments = moments, normalized = TRUE, print_zernikes = FALSE, type = type)

      Znm <- fit_zernike(Pnm = Pnm, G = d_id$z, method = "ls")
      d_id$Wnm <- reconstruct_waveform(Znm = Znm, Pnm = Pnm, method = "slow")

      Z_list <- Znm |>
        as_tibble(rownames = "id") |>
        mutate(V1 = round(V1, 4)) |>
        unite("id", id, V1, sep = "=") |>
        pull(id) |>
        paste(collapse = ", ")

      s <- interp(
        x = d_id$x,
        y = d_id$y,
        z = d_id$Wnm,
        xo = minitics,
        yo = minitics,
        linear = FALSE,
        duplicate = "strip"
      )

      subtitle <- Z_list
    }

    if (method %in% c("spline", "zernike")) {
      p <- plot_ly(x = s$x, y = s$y, z = s$z, width = 420, height = 420) |>
        add_surface(
          opacity = 0.7,
          contours = list(
            z = list(
              show = F,
              usecolormap = TRUE,
              highlightcolor = "#ff0000",
              project = list(z = TRUE)
            )
          )
        )
    }

    l[[id]] <- p |>
      add_markers(data = d_id, x = ~y, y = ~x, z = ~z, marker = list(size = 2, color = "black")) |>
      layout(title = list(text = paste0(
        "<br>Fit: ", method, "| Id: ", id, "<br>",
        "<sup>",
        subtitle,
        "</sup>"
      ))) |>
      as_widget() |>
      htmltools::tags$div(style = "border: solid; border-color:#e9e9e9; display:inline-block;")
  }

  l
}

reconstruct_waveform <- function(Znm, Pnm, method = "slow") {
  sum <- 0
  for (i in 1:length(Znm)) {
    sum <- sum + Znm[i] %*% Pnm[, i]
  }
  return(t(sum))
}

Plot Zernikes

Here are the five basis polynomials for each Zernike type.

get_Pnm <- function(x, y, moments, normalized = TRUE, print_zernikes = FALSE, type = "zernike") {
  calc_list <- list()
  for (i in 1:length(moments)) {
    z <- as.data.frame(zernike_expansion(x, y, noll = moments[i], normalized = normalized, print_zernikes = print_zernikes, type = type))

    colnames(z) <- paste0("Z", moments[i])
    calc_list[[i]] <- z
  }
  return(do.call(cbind, calc_list))
}


visualize_polynomials <- function(d, id, moments = c(1, 2, 3, 4, 5), type = "zernike", print_zernikes = FALSE) {
  l <- htmltools::tagList()

  if (length(id) > 1) stop("cannot compute for more than one id")

  # To map from purrr
  d_id <- d |>
    filter(id == {{ id }}) |>
    unnest(data) |>
    mutate(z = 0)

  outer_radius <- reshape::round_any(max(sqrt(d_id$x^2 + d_id$y^2)), 0.1, f = ceiling)
  spatial_res <- 50
  minitics <- seq(-outer_radius, outer_radius, length.out = spatial_res)

  Pnm <- get_Pnm(d_id$x, d_id$y, moments = moments, normalized = TRUE, print_zernikes = print_zernikes, type = type)

  d_id <- bind_cols(d_id, Pnm)

  for (moment in moments) {
    moment <- str_glue("Z{moment}")

    s <- interp(
      x = d_id$x,
      y = d_id$y,
      z = d_id |> pull(moment),
      xo = minitics,
      yo = minitics,
      linear = FALSE,
      duplicate = "strip"
    )

    l[[moment]] <- plot_ly(x = s$x, y = s$y, z = round(s$z, 3), width = 340, height = 340) |> # rounding for colorscale
      add_surface(opacity = 0.7) |>
      add_markers(data = d_id, x = ~y, ~x, z = ~z, marker = list(size = 2, color = "black")) |>
      layout(title = list(text = paste0(
        "<br>", moment, "<br>",
        "<sup>",
        "</sup>"
      ))) |>
      as_widget() |>
      htmltools::tags$div(style = "border: solid; border-color:#e9e9e9; display:inline-block;")
  }
  return(l)
}

Zernikes:

visualize_polynomials(d, 2, moments = c(1, 2, 3, 4, 11), type = "zernike", print_zernikes = T)

## noll: 1 |> n: 0, m: 0 | piston : bias | norm_factor: 1
## noll: 2 |> n: 1, m: 1 | tilt_h : image shift, pattern independent | norm_factor: 2
## noll: 3 |> n: 1, m: 1 | tilt_v : image shift, pattern independent | norm_factor: 2
## noll: 4 |> n: 2, m: 0 | defocus : degradation | norm_factor: 1.732
## noll: 11 |> n: 4, m: 0 | pri_spherical : focus shift, pattern dependent | norm_factor: 2.236

Bhatia-Wolf:

visualize_polynomials(d, 2, moments = c(1, 2, 3, 4, 5), type = "bhatia_wolf", print_zernikes = F)

Analyze Fits

There are many ways to fit the basis set to the data; for example, one can run a least squares estimate using the above fit_zernike() function, or use the built in lm function. I will be using both methods in this post.

zernike_lm_calc <- function(tbl, moments = c(1, 2, 3, 4, 11), type = "zernike") {
  Pnm <- get_Pnm(tbl$x, tbl$y, moments = moments, normalized = TRUE, print_zernikes = FALSE, type = type)
  Pnm <- Pnm |> mutate(z = tbl$z)
  model_fit <- lm(data = Pnm, formula = z ~ . - 1)
  std_error <- model_fit |> broom::augment(data = tbl, se_fit = T)
}

Here is an Altman plot to check for systematic issues between the two fits with a 95% heuristic.

d <- d |>
  mutate(
    zernike_fit = map(.x = data, .f = ~ zernike_lm_calc(tbl = .x, moments = c(1, 2, 3, 4, 11), type = "zernike")),
    bhatia_wolf_fit = map(.x = data, .f = ~ zernike_lm_calc(tbl = .x, moments = c(1, 2, 3, 4, 5), type = "bhatia_wolf"))
  )

d_zernike <- d |>
  unnest(zernike_fit) |>
  select(-c(.hat, .cooksd, bhatia_wolf_fit)) |>
  mutate(type = "zernike")

d_bw <- d |>
  unnest(bhatia_wolf_fit) |>
  select(-c(.hat, .cooksd, zernike_fit)) |>
  mutate(type = "bhatia_wolf")

d_all <- bind_rows(d_zernike, d_bw)

d_altman <- d_all |>
  select(type, id, x, y, z, .fitted) |>
  ungroup() |>
  pivot_wider(names_from = type, values_from = .fitted) |>
  unnest(zernike, bhatia_wolf) |>
  mutate(
    avg = (zernike + bhatia_wolf) / 2,
    diff = zernike - bhatia_wolf
  )

p_alt <- d_altman |>
  ggplot(aes(x = avg, y = diff, col = factor(id))) +
  geom_point(alpha = 0.5) +
  geom_hline(yintercept = mean(d_altman$diff, na.rm = T), colour = "#619CFF", size = 0.5) +
  geom_hline(yintercept = mean(d_altman$diff, na.rm = T) - (1.96 * sd(d_altman$diff, na.rm = T)), colour = "#F8766D", size = 0.5) +
  geom_hline(yintercept = mean(d_altman$diff, na.rm = T) + (1.96 * sd(d_altman$diff, na.rm = T)), colour = "#F8766D", size = 0.5) +
  geom_smooth(method = "lm", se = F, linetype = "dashed", size = 1) +
  labs(
    col = "disk id",
    y = "Zernike - 'Bhatia-Wolf'",
    x = "Average of Zernike and Bhatia-Wolf",
    title = "Altman Plot"
  )

p_alt

There is no difference between the two types.

p_reg <- d_all |>
  ggplot(aes(.fitted, .resid, col = type)) +
  geom_point(alpha = 0.5) +
  geom_rug(alpha = 0.3) +
  geom_hline(yintercept = 0, alpha = 0.5) +
  facet_wrap(~id, ncol = 1) +
  labs(
    x = "Fitted Value",
    y = "Residual Error",
    col = "Type",
    title = "Residuals vs. Fitted Values by disk id"
  )

p_reg

I see no problems here.

Generate Wavefronts

make_interp_plots(d = d, ids = c(1, 2), method = "zernike")

You can see that the tilt and hat shapes are preserved.

The simulated and fitted coefficients are also consistent with each other.

sim_coef <- tibble(
  method = "sim",
  id = c(1, 2),
  Z1 = c(0.1, 0.13),
  Z2 = NA,
  Z3 = c(-0.0005, 0.001),
  Z4 = c(0.001, 0.002),
  Z11 = c(0.002, 0.001)
)

fits_coef <- tibble(
  method = "fitted",
  id = c(1, 2),
  Z1 = c(0.0948271175, 0.1206922959),
  Z2 = c(-0.0006768549, 0.0011152965),
  Z3 = c(-0.0002019438, 0.0008916985),
  Z4 = c(0.0012214048, 0.0021135277),
  Z11 = c(0.0023266929, 0.0012510815)
)

bind_rows(sim_coef, fits_coef) |>
  pivot_longer(starts_with("Z"), values_to = "coef") |>
  mutate(id = as.factor(id)) |>
  ggplot(aes(method, coef, col = id)) +
  geom_point() +
  facet_wrap(~name, scales = "free") +
  scale_x_discrete(limits = rev) +
  labs(
    y = "Zernike Coefficients",
    x = "",
    col = "Id"
  )

Conclusion

Overfitting will be concern if we were characterizing a system. In the real-world, gathering system level data is usually expensive and time consuming. In this case, one could take a few (depending on the problem) measurements and do bootstrapping.