can also use the splines
package for spline models
library(splines)
# illustrative dataset
dat <- structure(list(xi = c(3.33, 9.54, 0.46, 1.35, 2.08, 7.79, 9.28, 3.97, 8.04, 8.4, 1.36, 8.4, 1.91, 3.85, 5.09, 1.8, 7.84, 3.38, 3.82, 2.4, 4.88, 6.95, 9.36, 1.18, 1.45, 6.86, 8.19, 3.94, 8.33, 4.75, 9.62, 1.02, 5.24, 8.96, 2.79, 0.43, 3.9, 6.13, 1.18, 8.89, 7.43, 7.88, 5.51, 5.73, 4.58, 1.46, 1.61, 8.52, 6.23, 4.51, 5.62, 8.72, 5.07, 9.13, 0.74, 7.67, 9.19, 0.24, 6.36, 2.23, 9.46, 3.57, 7.09, 4.67, 7.97, 1.68, 6.91, 3.02, 7.83, 6.84, 9.11, 3.66, 1.04, 5.62, 6.38, 7.44, 4.66, 0.47, 5.54, 0.99, 6.7, 2.03, 6.94, 9.49, 9.16, 3.13, 2.2, 3.53, 6.58, 5.88, 5.98, 8.81, 4.74, 2.63, 6.78, 3.09, 6.72, 2.14, 0.27, 8.93, 3.59, 0.59, 1.91, 2.47, 8.53, 6.45, 2.44, 6.38, 6.76, 3.41, 9.46, 1.77, 0.91, 7.16, 5.95, 7.88, 5.33, 2.17, 6.33, 5.94, 6.85, 0.62, 4.92, 1.2, 0.76, 4.5, 5.73, 4.24, 0.99, 6.7, 4.27, 0.91, 6.43, 0.9, 1.21, 9.04, 2.8, 9.35, 8.06, 7.96, 0.36, 1.3, 2.46, 0.73, 1.68, 9.35, 5.91, 6.74, 0.32, 3.43, 1.78, 9.01, 8.23, 0.57, 8.22, 2.42, 7.35, 2.61, 7.12, 2.21, 8.23, 2.48, 5.04, 5.16, 5.29, 4.27, 1.17, 7.52, 9.16, 2.56, 2.74, 8.1, 1.55, 6.12, 8.12, 4.74, 6.39, 5.64, 8.9, 2.5, 7.91, 9.33, 8.29, 4.62, 5.33, 3.96, 1.41, 6.49, 0.39, 8.22, 3.85, 4.55, 7.07, 4.53, 6.29, 0.75, 1.38, 8.22, 6.1, 1.97, 3.23, 5.8, 5.16, 8.27, 8.73, 7.46, 6.76, 8.25, 3.16, 1.77, 8.98, 3.24, 6.58, 4.15, 2.11, 5.24, 5.45, 6.1, 8.77, 0.77, 6.48, 9.44, 9.58, 6.24, 2.66, 7.34, 5.88, 7.37, 6.26, 6.86, 4.2, 5.75, 1.77, 7.53, 3.32, 6.19, 5.41, 7.1, 4.58, 5.92, 4.8, 1.17, 3.5, 4.82, 5.85, 0.84, 1.15, 5.38, 2.94, 6.68), yi = c(2.28, 5.86, 2.47, 1.79, 2.16, 6.41, 6.19, 2.89, 5.76, 6.28, 2.37, 5.66, 2.17, 2.47, 4.31, 1.57, 5.62, 2.58, 3.12, 2.05, 4.38, 5.62, 5.83, 1.97, 2.44, 5.75, 5.45, 2.27, 5.9, 3.91, 5.86, 1.7, 4.27, 5.91, 2.36, 1.85, 2.54, 5.74, 2.4, 5.92, 5.53, 5.9, 4.47, 4.75, 3.34, 2.02, 2.1, 5.89, 5.01, 3.42, 4.86, 5.89, 3.9, 6.43, 2.36, 6.37, 5.54, 2.46, 4.87, 2.43, 5.97, 3.35, 5.51, 3.36, 5.96, 2.08, 5.95, 2.79, 5.73, 5.81, 6.15, 2.71, 1.94, 4.95, 4.63, 5.63, 4.06, 2.29, 4.21, 2.15, 5.35, 2.08, 5.86, 6.45, 5.95, 2.33, 2.35, 2.81, 5.46, 5.43, 4.67, 6.5, 3.66, 2.76, 5.38, 2.63, 5.6, 1.58, 2.11, 5.86, 2.97, 1.93, 1.75, 2.69, 6.38, 5.09, 2.19, 6.22, 5.45, 2.82, 5.56, 2.45, 2.07, 5.53, 5.13, 5.32, 4.06, 1.94, 4.85, 4.71, 5.55, 2.58, 3.95, 1.99, 1.59, 3.69, 4.74, 2.87, 2.24, 5.99, 3.14, 1.44, 5.42, 1.73, 2.01, 5.59, 2.26, 5.92, 5.71, 5.68, 2.52, 2.1, 2.24, 1.78, 1.86, 5.8, 5.06, 5.29, 1.62, 3.09, 2.69, 6.02, 5.85, 2.01, 6.21, 2.36, 5.32, 1.91, 5.74, 2.86, 5.51, 2.37, 4.19, 4.31, 4.35, 3.05, 1.85, 5.72, 6.4, 2.1, 2.13, 5.94, 2.21, 5.71, 6.12, 3.74, 4.78, 5.33, 5.75, 2.18, 5.66, 6.39, 6.2, 3.29, 4.25, 2.91, 1.86, 5.79, 2, 5.65, 3.15, 3.46, 5.18, 3.5, 5.58, 2.26, 1.93, 5.66, 5.19, 2.32, 2.44, 5.31, 3.93, 5.57, 5.77, 5.59, 5.84, 5.99, 2.12, 2.53, 6.57, 2.63, 5.23, 3.1, 2.6, 4.17, 4.53, 4.89, 5.89, 2.08, 5.37, 5.85, 5.67, 5.06, 2.3, 5.66, 4.95, 5.69, 5.43, 5.49, 3.3, 4.58, 1.58, 5.55, 2.88, 4.56, 4.86, 5.7, 3.13, 4.94, 4.19, 1.88, 2.38, 3.6, 5.2, 1.95, 1.98, 4.62, 2.23, 5.27 )), class = "data.frame", row.names = c(NA, -250L))
# plot xi versus yi
plot(dat$xi, dat$yi, pch=21, col="gray40", bg="gray80", xlab="Variable x", ylab="Variable y", xlim=c(0,10), ylim=c(1,7))
# fit cubic spline model with 4 knots (two at the lower and upper end of xi)
res <- lm(yi ~ bs(xi, df=5), data=dat) # can also try ns()
# get the knot positions that were used in the model
knots <- sort(c(attr(res$terms, "predvars")[[3]][["knots"]], attr(res$terms, "predvars")[[3]][["Boundary.knots"]]))
knots
## 33.33333% 66.66667%
## 0.24 3.43 6.58 9.62
# show knots in the scatterplot
abline(v=knots, lty="dotted")
# compute predicted values based on the model (and 95% CI for the predicted values)
xs <- seq(min(dat$xi), max(dat$xi), length=1001)
sav <- predict(res, newdata = data.frame(xi=xs), interval="confidence")
sav <- data.frame(sav)
polygon(c(xs,rev(xs)), c(sav$lwr,rev(sav$upr)), col=rgb(0,0,1,.3), border=NA)
lines(xs, sav$fit, lwd=3)
# add the prediction interval
sav <- predict(res, newdata = data.frame(xi=xs), interval="prediction")
sav <- data.frame(sav)
polygon(c(xs,rev(xs)), c(sav$lwr,rev(sav$upr)), col=rgb(0,0,0,.05), border=NA)