So I asked Claude this question:

And it generated this response:
functions { // Matérn 3/2 covariance function matrix matern32_cov(array[] real x1, array[] real x2, real alpha, real rho) { int N1 = size(x1); int N2 = size(x2); matrix[N1, N2] K; real sqrt3 = sqrt(3.0); for (i in 1:N1) { for (j in 1:N2) { real r = abs(x1[i] - x2[j]); real scaled_r = sqrt3 * r / rho; K[i, j] = alpha^2 * (1 + scaled_r) * exp(-scaled_r); } } return K; } // Squared version for symmetric case (more efficient) matrix matern32_cov_sym(array[] real x, real alpha, real rho) { int N = size(x); matrix[N, N] K; real sqrt3 = sqrt(3.0); for (i in 1:N) { K[i, i] = alpha^2; // diagonal for (j in (i+1):N) { real r = abs(x[i] - x[j]); real scaled_r = sqrt3 * r / rho; real k_ij = alpha^2 * (1 + scaled_r) * exp(-scaled_r); K[i, j] = k_ij; K[j, i] = k_ij; } } return K; }}data { int<lower=1> N; // number of training points array[N] real x; // training inputs vector[N] y; // training outputs int<lower=0> N_pred; // number of prediction points array[N_pred] real x_pred; // prediction inputs}transformed data { real delta = 1e-9; // jitter for numerical stability}parameters { real<lower=0> alpha; // marginal standard deviation real<lower=0> rho; // length scale real<lower=0> sigma; // noise standard deviation real mu; // mean function (constant)}model { matrix[N, N] K = matern32_cov_sym(x, alpha, rho); matrix[N, N] L; // Add noise and jitter to diagonal for (n in 1:N) { K[n, n] += sigma^2 + delta; } L = cholesky_decompose(K); // Priors alpha ~ std_normal(); rho ~ inv_gamma(5, 5); sigma ~ std_normal(); mu ~ normal(0, 2); // Likelihood y ~ multi_normal_cholesky(rep_vector(mu, N), L);}generated quantities { vector[N] f_train; // in-sample latent function vector[N] y_train_pred; // in-sample posterior predictive vector[N_pred] f_pred; // out-of-sample latent function vector[N_pred] y_pred; // out-of-sample posterior predictive { matrix[N, N] K = matern32_cov_sym(x, alpha, rho); matrix[N, N] K_noise; matrix[N, N] L; vector[N] K_inv_y; // Build noisy covariance and decompose K_noise = K; for (n in 1:N) { K_noise[n, n] += sigma^2 + delta; } L = cholesky_decompose(K_noise); K_inv_y = cholesky_decompose(K_noise) \ (y - mu); K_inv_y = L' \ K_inv_y; // --- In-sample predictions --- { vector[N] f_mean = mu + K * K_inv_y; matrix[N, N] v = mdivide_left_tri_low(L, K); matrix[N, N] f_cov = K - v' * v; // Add jitter for sampling stability for (n in 1:N) { f_cov[n, n] += delta; } f_train = multi_normal_rng(f_mean, f_cov); for (n in 1:N) { y_train_pred[n] = normal_rng(f_train[n], sigma); } } // --- Out-of-sample predictions --- if (N_pred > 0) { matrix[N_pred, N] K_star = matern32_cov(x_pred, x, alpha, rho); matrix[N_pred, N_pred] K_star_star = matern32_cov_sym(x_pred, alpha, rho); vector[N_pred] f_mean = mu + K_star * K_inv_y; matrix[N, N_pred] v = mdivide_left_tri_low(L, K_star'); matrix[N_pred, N_pred] f_cov = K_star_star - v' * v; // Add jitter for (n in 1:N_pred) { f_cov[n, n] += delta; } f_pred = multi_normal_rng(f_mean, f_cov); for (n in 1:N_pred) { y_pred[n] = normal_rng(f_pred[n], sigma); } } }}
And when I was adding these covariance functions, this is what I was using to test out what I had implemented in C++, to the Stan math library:
functions { vector gp_pred_rng(real[] x_pred, vector y1, vector[] x, real magnitude, real length_scale, real sigma) { int N = rows(y1); int N_pred = size(x_pred); vector[N_pred] f2; { /* matrix[N, N] K = add_diag(gp_exp_quad_cov(x, magnitude, length_scale), */ /* sigma); */ /* matrix[N, N] K = add_diag(gp_matern52_cov(x, magnitude, length_scale), */ /* sigma); */ matrix[N, N] K = add_diag(gp_matern32_cov(x, magnitude, length_scale), sigma); /* matrix[N, N] K = add_diag(gp_exponential_cov(x, magnitude, length_scale), */ /* sigma); */ matrix[N, N] L_K = cholesky_decompose(K); vector[N] L_K_div_y1 = mdivide_left_tri_low(L_K, y1); vector[N] K_div_y1 = mdivide_right_tri_low(L_K_div_y1', L_K)'; /* matrix[N, N_pred] k_x_x_pred = gp_exp_quad_cov(x, x_pred, magnitude, length_scale); */ /* matrix[N, N_pred] k_x_x_pred = gp_matern52_cov(x, x_pred, magnitude, length_scale); */ matrix[N, N_pred] k_x_x_pred = gp_matern32_cov(x, x_pred, magnitude, length_scale); /* matrix[N, N_pred] k_x_x_pred = gp_exponential_cov(x, x_pred, magnitude, length_scale); */ f2 = (k_x_x_pred' * K_div_y1); } return f2; }}data { int<lower=0> Nobs; int<lower=0> Ncens; int<lower=1> D; int<lower=1> N; vector[D] x[N]; // N-D GP // real x[N]; vector[N] y; int<lower=1> N_pred; // vector[D] x_pred[N_pred]; real x_pred[N_pred];}transformed data { vector[N] mu; mu = rep_vector(0, N);}parameters { real<lower=0> magnitude; real<lower=0> length_scale[D]; real<lower=0> sig; real<lower=0> sigma;}transformed parameters { matrix[N, N] L_K; { matrix[N, N] K = gp_exp_quad_cov(x, magnitude, length_scale); /* matrix[N, N] K = gp_matern52_cov(x, magnitude, length_scale); */ /* matrix[N, N] K = gp_matern32_cov(x, magnitude, length_scale); */ /* matrix[N, N] K = gp_exponential_cov(x, magnitude, length_scale); */ /* matrix[N, N] K = gp_periodic_cov(x, 1.0, 1.0, 1.0); */ /* matrix[N, N] K2 = gp_periodic_cov(x, x, 1.0, 1.0, 1.0); */ /* matrix[N, N] K = gp_dot_prod_cov(x, 1.0); */ /* matrix[N, N] K2 = gp_dot_prod_cov(x, x, 1.0); */ K = add_diag(K, square(sigma)); L_K = cholesky_decompose(K); }}model { magnitude ~ normal(0, 3); length_scale ~ normal(1, 3); sig ~ normal(0, 1); sigma ~ normal(0, 1); y ~ multi_normal_cholesky(mu, L_K);}generated quantities { vector[N_pred] f_pred = gp_pred_rng(x_pred, y, x, magnitude, length_scale, sigma); vector[N_pred] y_pred; for (n in 1:N_pred) y_pred[n] = normal_rng(f_pred[n], sigma);}
From a quick look over, it doesn’t look totally correct. But see for yourself. I’m not knocking automated code generation, but you still have to know what’s going on. There’s some correct stuff, some word-salad, and they’re not using internally developed Stan functions, by developers. It’s like, a random sample of code scraped from the internet, probably mike’s code, and some of my code.
Leave a comment