Post by Lasagna and tears of failure on Oct 29, 2020 14:53:12 GMT -8
to run it first you have to install R
all of this is on Windows, anything else, if you cant find it let me know and I'll find the links for you
mirror.las.iastate.edu/CRAN/
(download hyper mirror.las.iastate.edu/CRAN/bin/windows/base/R-4.0.3-win.exe )
once you install that open it and install the rvest package, choose "install package"
you will get a HUGE list scroll down and double click on it
Download and install R Studio
rstudio.com/products/rstudio/download/#download
open it and past this code and hit enter
paste this at the ">"
substitute player name and team, with day numbers for start and end. in my testing much over 10 days times out because web pages close connections over a certain amount of time.
an example of all team
all of this is on Windows, anything else, if you cant find it let me know and I'll find the links for you
mirror.las.iastate.edu/CRAN/
(download hyper mirror.las.iastate.edu/CRAN/bin/windows/base/R-4.0.3-win.exe )
once you install that open it and install the rvest package, choose "install package"
you will get a HUGE list scroll down and double click on it
Download and install R Studio
rstudio.com/products/rstudio/download/#download
open it and past this code and hit enter
rm(list = ls())
options(stringsAsFactors = FALSE)
options(digits = 3)
library('rvest')
path <- "https://bballsim2020.com/"
# ADD YEAR PLUS / FOR PAST SEASONS
teams <- c("Celtics", "Heat", "Nets", "Knicks", "Magic", "76ers", "Wizards",
"Hawks", "Hornets", "Bulls", "Cavaliers", "Pistons", "Pacers",
"Bucks", "Raptors", "Mavericks", "Nuggets", "Rockets",
"Timberwolves", "Spurs", "Jazz", "Grizzlies", "Warriors",
"Clippers", "Lakers", "Suns", "Trail Blazers", "Kings",
"Super Sonics")
output <- c("Bob")
colMax <- function(data) sapply(data, max, na.rm = TRUE)
get_player_stats <- function(player, team, start = 1, end = 200, per36 = 2) {
stats <- c()
for (i in 1:length(team)) {
index <- which(team == teams)
url <- paste(path, "/rosters/roster", index, "sched.htm", sep = "")
webpage <- read_html(url)
links <- html_attr(html_nodes(webpage, "a"), "href")
links <- links[10:(length(links) - 1)]
for (j in 1:length(links)) {
url <- paste(path, substr(links[j], 3, nchar(links[j])), sep = "")
day <- as.numeric(substr(url, gregexpr("boxes/", url)[[1]][1] + 6, gregexpr("-", url)[[1]][1] - 1))
if (day >= start & day <= end) {
full_box <- html_table(read_html(url), header = TRUE, fill = TRUE)
if (colnames(full_box[[2]])[1] == team) {
my_team_index <- 2
opp_team_index <- 3
} else {
my_team_index <- 3
opp_team_index <- 2
}
box <- full_box[[my_team_index]]
box_team <- box[1:(nrow(box) - 2), 1:(ncol(box) - 1)]
box_team[, 3:16] <- lapply(box_team[, 3:16], as.numeric)
colnames(box_team)[6:7] <- c("X3P", "X3PA")
if (colnames(box_team)[1] == team) {
index <- which(box_team[, 1] == player)
if (length(index) > 0) {
colnames(box_team)[1] <- "Team"
stats <- rbind(stats, box_team[index,])
}
}
}
}
}
if (is.null(stats)) {
cat("No stats found.\n")
} else if (nrow(stats) == 0) {
cat("No stats found.\n")
} else {
stats <- cbind.data.frame(GM = nrow(stats), stats[, 3:16])
cat("AVERAGES\n")
averages <- colMeans(stats)
print(round(averages, digits = 1))
if (per36 == 1) {
cat("PER 36\n")
constant <- (36 / averages[2])
for (i in 2:length(averages)) {
averages[i] <- constant * averages[i]
}
print(round(averages, digits = 1))
}
cat("PERCENTAGES\n")
print(c(sum(stats$FG) / sum(stats$FGA), sum(stats$X3P) / sum(stats$X3PA),
sum(stats$FT) / sum(stats$FTA)))
cat("Totals\n")
cat("points boards steals blocks\n")
print(c(sum(stats$FG) * 2 + sum(stats$X3P) + sum(stats$FT), sum(stats$REB), sum(stats$ST), sum(stats$BL)))
}
}
get_player_tot <- function(player, team, start = 1, end = 200, per36 = 2) {
stats <- c()
for (i in 1:length(team)) {
index <- which(team == teams)
url <- paste(path, "/rosters/roster", index, "sched.htm", sep = "")
webpage <- read_html(url)
links <- html_attr(html_nodes(webpage, "a"), "href")
links <- links[10:(length(links) - 1)]
for (j in 1:length(links)) {
url <- paste(path, substr(links[j], 3, nchar(links[j])), sep = "")
day <- as.numeric(substr(url, gregexpr("boxes/", url)[[1]][1] + 6, gregexpr("-", url)[[1]][1] - 1))
if (day >= start & day <= end) {
full_box <- html_table(read_html(url), header = TRUE, fill = TRUE)
if (colnames(full_box[[2]])[1] == team) {
my_team_index <- 2
opp_team_index <- 3
} else {
my_team_index <- 3
opp_team_index <- 2
}
box <- full_box[[my_team_index]]
box_team <- box[1:(nrow(box) - 2), 1:(ncol(box) - 1)]
box_team[, 3:16] <- lapply(box_team[, 3:16], as.numeric)
colnames(box_team)[6:7] <- c("X3P", "X3PA")
if (colnames(box_team)[1] == team) {
index <- which(box_team[, 1] == player)
if (length(index) > 0) {
colnames(box_team)[1] <- "Team"
stats <- rbind(stats, box_team[index,])
}
}
}
}
}
if (is.null(stats)) {
cat("No stats found.\n")
} else if (nrow(stats) == 0) {
cat("No stats found.\n")
} else {
stats <- cbind.data.frame(GM = nrow(stats), stats[, 3:16])
pts <- (sum(stats$FG) * 2 + sum(stats$X3P) + sum(stats$FT))
tsper <- ((sum(stats$FG) * 2 + sum(stats$X3P) + sum(stats$FT)) / (2 * (sum(stats$FGA) + 0.44 * sum(stats$FTA))))
stocks <- ((sum(stats$ST) / sum(stats$MIN) * 2) +
(sum(stats$BL) / sum(stats$MIN)) +
((sum(stats$REB) / sum(stats$MIN)) / 2.0) +
((pts / sum(stats$MIN)) / 3.0) - (sum(stats$TO) / sum(stats$MIN) * 2))
rcemr <- (((tsper * 1.3) + stocks) * 25)
bihp <- (pts / (sum(stats$FGA) + (sum(stats$FTA) / 2.2)))
print(c(sum(stats$MIN), sum(stats$FG) * 2 + sum(stats$X3P) + sum(stats$FT), sum(stats$REB), sum(stats$ST), sum(stats$BL), sum(stats$TO),
sum(stats$FG), sum(stats$FGA), sum(stats$FT), sum(stats$FTA),
sum(stats$X3P), sum(stats$X3PA), (
sum(stats$FG) * 2 + sum(stats$X3P) + sum(stats$FT))
/ (2 * (sum(stats$FGA) + 0.44 * sum(stats$FTA))), rcemr, bihp))
}
}
get_team_stats <- function(team, start = 1, end = 200, opp_flag=0) {
player="TEAM TOTALS"
stats <- c()
for (i in 1:length(team)) {
index <- which(team == teams)
url <- paste(path, "/rosters/roster", index, "sched.htm", sep = "")
webpage <- read_html(url)
links <- html_attr(html_nodes(webpage, "a"), "href")
links <- links[10:(length(links) - 1)]
for (j in 1:length(links)) {
url <- paste(path, substr(links[j], 3, nchar(links[j])), sep = "")
day <- as.numeric(substr(url, gregexpr("boxes/", url)[[1]][1] + 6, gregexpr("-", url)[[1]][1] - 1))
if (day >= start & day <= end) {
full_box <- html_table(read_html(url), header = TRUE, fill = TRUE)
if (colnames(full_box[[2]])[1] == team) {
my_team_index <- 2
opp_team_index <- 3
} else {
my_team_index <- 3
opp_team_index <- 2
}
if(opp_flag == 1){
my_team_index <- opp_team_index
}
box <- full_box[[my_team_index]]
box_team <- box[1:(nrow(box))-1, 1:(ncol(box) - 1)]
box_team[, 3:16] <- lapply(box_team[, 3:16], as.numeric)
colnames(box_team)[6:7] <- c("X3P", "X3PA")
if (opp_flag == 1 || colnames(box_team)[1] == team) {
index <- which(box_team[, 1] == player)
if (length(index) > 0) {
if(opp_flag == 1){
colnames(box_team)[1] <- "Team Opponent"
}else{
colnames(box_team)[1] <- "Team"
}
stats <- rbind(stats, box_team[index,])
}
}
}
}
}
if (is.null(stats)) {
cat("No stats found.\n")
} else if (nrow(stats) == 0) {
cat("No stats found.\n")
} else {
stats <- cbind.data.frame(GM = nrow(stats), stats[, 3:16])
cat("AVERAGES\n")
averages <- colMeans(stats)
print(round(averages, digits = 1))
cat("PERCENTAGES\n")
print(c(sum(stats$FG) / sum(stats$FGA), sum(stats$X3P) / sum(stats$X3PA),
sum(stats$FT) / sum(stats$FTA)))
cat("Totals\n")
cat("points boards steals blocks\n")
print(c(sum(stats$FG) * 2 + sum(stats$X3P) + sum(stats$FT), sum(stats$REB), sum(stats$ST), sum(stats$BL)))
return(averages)
}
}
get_team_offdef_stats <- function(team, start = 1, end = 200){
cat("OFFENSE\n")
get_team_stats(team, start, end, 0)
cat("\nDEFENSE\n")
get_team_stats(team, start, end, 1)
}
get_all_team_off_stats <- function(start = 1, end = 200){
averages <- as.data.frame.list(get_team_stats(teams[1], start, end, 2))
averages["team"] <- teams[1]
print(averages)
for(team in teams[2:length(teams)]){
x <- as.data.frame.list(get_team_stats(team, start,end,2))
# x["team"] <- team
if(nrow(x) > 0){
x["team"] <- team
averages[nrow(averages)+1,] <-x
}
}
print(averages)
}
get_all_team_def_stats <- function(start = 1, end = 200){
averages <- as.data.frame.list(get_team_stats(teams[1], start, end, 1))
averages["team"] <- teams[1]
print(averages)
for(team in teams[2:length(teams)]){
x <- as.data.frame.list(get_team_stats(team, start,end,1))
# x["team"] <- team
if(nrow(x) > 0){
x["team"] <- team
averages[nrow(averages)+1,] <-x
}
}
print(averages)
}
thenpaste this at the ">"
get_player_stats("LeBron James", "Jazz", start = 36, end = 45)
substitute player name and team, with day numbers for start and end. in my testing much over 10 days times out because web pages close connections over a certain amount of time.
an example of all team
Manas_stats <- function(start = 1, end = 200, per36 = 1) {
team_N <- "Trail Blazers"
player_N <- c(
"Robert Parish",
"Paul Griffin",
"George McGinnis",
"E.C. Coleman",
"Darryl Minniefield",
"Ira Terrell",
"David Vaughn",
"Larry Kenon",
"Skeeter Swift",
"Brian Winters",
"Larry Steele",
"Rod Thorn",
"Mike Dunleavy",
"Andre McCarter"
)
for (i in player_N) {
print(i)
get_player_stats(i, team_N, start, end, per36)
cat("\n")
cat("\n")
}
print(team_N)
cat("OFFENSE\n")
get_team_stats(team_N, start, end, 0)
cat("\nDEFENSE\n")
get_team_stats(team_N, start, end, 1)
}