Me vs. Claude

So I asked Claude this question:

And it generated this response:

functions {
// Matérn 3/2 covariance function
matrix matern32_cov(array[] real x1, array[] real x2, real alpha, real rho) {
int N1 = size(x1);
int N2 = size(x2);
matrix[N1, N2] K;
real sqrt3 = sqrt(3.0);
for (i in 1:N1) {
for (j in 1:N2) {
real r = abs(x1[i] - x2[j]);
real scaled_r = sqrt3 * r / rho;
K[i, j] = alpha^2 * (1 + scaled_r) * exp(-scaled_r);
}
}
return K;
}
// Squared version for symmetric case (more efficient)
matrix matern32_cov_sym(array[] real x, real alpha, real rho) {
int N = size(x);
matrix[N, N] K;
real sqrt3 = sqrt(3.0);
for (i in 1:N) {
K[i, i] = alpha^2; // diagonal
for (j in (i+1):N) {
real r = abs(x[i] - x[j]);
real scaled_r = sqrt3 * r / rho;
real k_ij = alpha^2 * (1 + scaled_r) * exp(-scaled_r);
K[i, j] = k_ij;
K[j, i] = k_ij;
}
}
return K;
}
}
data {
int<lower=1> N; // number of training points
array[N] real x; // training inputs
vector[N] y; // training outputs
int<lower=0> N_pred; // number of prediction points
array[N_pred] real x_pred; // prediction inputs
}
transformed data {
real delta = 1e-9; // jitter for numerical stability
}
parameters {
real<lower=0> alpha; // marginal standard deviation
real<lower=0> rho; // length scale
real<lower=0> sigma; // noise standard deviation
real mu; // mean function (constant)
}
model {
matrix[N, N] K = matern32_cov_sym(x, alpha, rho);
matrix[N, N] L;
// Add noise and jitter to diagonal
for (n in 1:N) {
K[n, n] += sigma^2 + delta;
}
L = cholesky_decompose(K);
// Priors
alpha ~ std_normal();
rho ~ inv_gamma(5, 5);
sigma ~ std_normal();
mu ~ normal(0, 2);
// Likelihood
y ~ multi_normal_cholesky(rep_vector(mu, N), L);
}
generated quantities {
vector[N] f_train; // in-sample latent function
vector[N] y_train_pred; // in-sample posterior predictive
vector[N_pred] f_pred; // out-of-sample latent function
vector[N_pred] y_pred; // out-of-sample posterior predictive
{
matrix[N, N] K = matern32_cov_sym(x, alpha, rho);
matrix[N, N] K_noise;
matrix[N, N] L;
vector[N] K_inv_y;
// Build noisy covariance and decompose
K_noise = K;
for (n in 1:N) {
K_noise[n, n] += sigma^2 + delta;
}
L = cholesky_decompose(K_noise);
K_inv_y = cholesky_decompose(K_noise) \ (y - mu);
K_inv_y = L' \ K_inv_y;
// --- In-sample predictions ---
{
vector[N] f_mean = mu + K * K_inv_y;
matrix[N, N] v = mdivide_left_tri_low(L, K);
matrix[N, N] f_cov = K - v' * v;
// Add jitter for sampling stability
for (n in 1:N) {
f_cov[n, n] += delta;
}
f_train = multi_normal_rng(f_mean, f_cov);
for (n in 1:N) {
y_train_pred[n] = normal_rng(f_train[n], sigma);
}
}
// --- Out-of-sample predictions ---
if (N_pred > 0) {
matrix[N_pred, N] K_star = matern32_cov(x_pred, x, alpha, rho);
matrix[N_pred, N_pred] K_star_star = matern32_cov_sym(x_pred, alpha, rho);
vector[N_pred] f_mean = mu + K_star * K_inv_y;
matrix[N, N_pred] v = mdivide_left_tri_low(L, K_star');
matrix[N_pred, N_pred] f_cov = K_star_star - v' * v;
// Add jitter
for (n in 1:N_pred) {
f_cov[n, n] += delta;
}
f_pred = multi_normal_rng(f_mean, f_cov);
for (n in 1:N_pred) {
y_pred[n] = normal_rng(f_pred[n], sigma);
}
}
}
}

And when I was adding these covariance functions, this is what I was using to test out what I had implemented in C++, to the Stan math library:

functions {
vector gp_pred_rng(real[] x_pred,
vector y1, vector[] x,
real magnitude, real length_scale,
real sigma) {
int N = rows(y1);
int N_pred = size(x_pred);
vector[N_pred] f2;
{
/* matrix[N, N] K = add_diag(gp_exp_quad_cov(x, magnitude, length_scale), */
/* sigma); */
/* matrix[N, N] K = add_diag(gp_matern52_cov(x, magnitude, length_scale), */
/* sigma); */
matrix[N, N] K = add_diag(gp_matern32_cov(x, magnitude, length_scale),
sigma);
/* matrix[N, N] K = add_diag(gp_exponential_cov(x, magnitude, length_scale), */
/* sigma); */
matrix[N, N] L_K = cholesky_decompose(K);
vector[N] L_K_div_y1 = mdivide_left_tri_low(L_K, y1);
vector[N] K_div_y1 = mdivide_right_tri_low(L_K_div_y1', L_K)';
/* matrix[N, N_pred] k_x_x_pred = gp_exp_quad_cov(x, x_pred, magnitude, length_scale); */
/* matrix[N, N_pred] k_x_x_pred = gp_matern52_cov(x, x_pred, magnitude, length_scale); */
matrix[N, N_pred] k_x_x_pred = gp_matern32_cov(x, x_pred, magnitude, length_scale);
/* matrix[N, N_pred] k_x_x_pred = gp_exponential_cov(x, x_pred, magnitude, length_scale); */
f2 = (k_x_x_pred' * K_div_y1);
}
return f2;
}
}
data {
int<lower=0> Nobs;
int<lower=0> Ncens;
int<lower=1> D;
int<lower=1> N;
vector[D] x[N]; // N-D GP
// real x[N];
vector[N] y;
int<lower=1> N_pred;
// vector[D] x_pred[N_pred];
real x_pred[N_pred];
}
transformed data {
vector[N] mu;
mu = rep_vector(0, N);
}
parameters {
real<lower=0> magnitude;
real<lower=0> length_scale[D];
real<lower=0> sig;
real<lower=0> sigma;
}
transformed parameters {
matrix[N, N] L_K;
{
matrix[N, N] K = gp_exp_quad_cov(x, magnitude, length_scale);
/* matrix[N, N] K = gp_matern52_cov(x, magnitude, length_scale); */
/* matrix[N, N] K = gp_matern32_cov(x, magnitude, length_scale); */
/* matrix[N, N] K = gp_exponential_cov(x, magnitude, length_scale); */
/* matrix[N, N] K = gp_periodic_cov(x, 1.0, 1.0, 1.0); */
/* matrix[N, N] K2 = gp_periodic_cov(x, x, 1.0, 1.0, 1.0); */
/* matrix[N, N] K = gp_dot_prod_cov(x, 1.0); */
/* matrix[N, N] K2 = gp_dot_prod_cov(x, x, 1.0); */
K = add_diag(K, square(sigma));
L_K = cholesky_decompose(K);
}
}
model {
magnitude ~ normal(0, 3);
length_scale ~ normal(1, 3);
sig ~ normal(0, 1);
sigma ~ normal(0, 1);
y ~ multi_normal_cholesky(mu, L_K);
}
generated quantities {
vector[N_pred] f_pred = gp_pred_rng(x_pred, y, x, magnitude, length_scale, sigma);
vector[N_pred] y_pred;
for (n in 1:N_pred) y_pred[n] = normal_rng(f_pred[n], sigma);
}

From a quick look over, it doesn’t look totally correct. But see for yourself. I’m not knocking automated code generation, but you still have to know what’s going on. There’s some correct stuff, some word-salad, and they’re not using internally developed Stan functions, by developers. It’s like, a random sample of code scraped from the internet, probably mike’s code, and some of my code.

Leave a comment