Run Expectancy
annual_statcast_quary()
annual_statcast_query() loops over weeks in a year and picks up games from
game_pk), at-bats w/in games (atbat_num), & pitches w/in at-bats (pitch_num)inning and inning_top_botballs, strikes, outs_when_upbatter, on_1b, on_2b, on_3b: offensive player IDspitcher, fielder_2, …, fielder_9: defensive player IDstype: Ball, Strike, contact (X)description: pitch-level outcome type
description B S X
ball 231032 0 0
blocked_ball 14717 0 0
bunt_foul_tip 0 15 0
called_strike 0 113912 0
foul 0 126012 0
foul_bunt 0 1208 0
foul_tip 0 7218 0
hit_by_pitch 1979 0 0
hit_into_play 0 7 121744
missed_bunt 0 196 0
pitchout 52 0 0
swinging_strike 0 73208 1
swinging_strike_blocked 0 3834 0
# A tibble: 20 × 6
at_bat_number pitch_number balls strikes type des
<int> <int> <int> <int> <chr> <chr>
1 1 1 0 0 B Mookie Betts walks.
2 1 2 1 0 S Mookie Betts walks.
3 1 3 1 1 B Mookie Betts walks.
4 1 4 2 1 B Mookie Betts walks.
5 2 1 0 0 B Shohei Ohtani grounds into a …
6 2 2 1 0 S Shohei Ohtani grounds into a …
7 2 3 1 1 B Shohei Ohtani grounds into a …
8 2 4 2 1 X Shohei Ohtani grounds into a …
9 3 1 0 0 S Freddie Freeman called out on…
10 3 2 0 1 S Freddie Freeman called out on…
11 3 3 0 2 B Freddie Freeman called out on…
12 3 4 1 2 S Freddie Freeman called out on…
13 4 1 0 0 B Will Smith flies out to left …
14 4 2 1 0 X Will Smith flies out to left …
15 5 1 0 0 B Xander Bogaerts flies out to …
16 5 2 1 0 S Xander Bogaerts flies out to …
17 5 3 1 1 B Xander Bogaerts flies out to …
18 5 4 2 1 S Xander Bogaerts flies out to …
19 5 5 2 2 B Xander Bogaerts flies out to …
20 5 6 3 2 X Xander Bogaerts flies out to …
# A tibble: 20 × 6
batter outs_when_up on_1b on_2b on_3b des
<int> <int> <dbl> <dbl> <dbl> <chr>
1 605141 0 NA NA NA Mookie Betts walks.
2 605141 0 NA NA NA Mookie Betts walks.
3 605141 0 NA NA NA Mookie Betts walks.
4 605141 0 NA NA NA Mookie Betts walks.
5 660271 0 605141 NA NA Shohei Ohtani grounds into a force ou…
6 660271 0 605141 NA NA Shohei Ohtani grounds into a force ou…
7 660271 0 605141 NA NA Shohei Ohtani grounds into a force ou…
8 660271 0 605141 NA NA Shohei Ohtani grounds into a force ou…
9 518692 1 660271 NA NA Freddie Freeman called out on strikes.
10 518692 1 660271 NA NA Freddie Freeman called out on strikes.
11 518692 1 660271 NA NA Freddie Freeman called out on strikes.
12 518692 1 660271 NA NA Freddie Freeman called out on strikes.
13 669257 2 660271 NA NA Will Smith flies out to left fielder …
14 669257 2 660271 NA NA Will Smith flies out to left fielder …
15 593428 0 NA NA NA Xander Bogaerts flies out to right fi…
16 593428 0 NA NA NA Xander Bogaerts flies out to right fi…
17 593428 0 NA NA NA Xander Bogaerts flies out to right fi…
18 593428 0 NA NA NA Xander Bogaerts flies out to right fi…
19 593428 0 NA NA NA Xander Bogaerts flies out to right fi…
20 593428 0 NA NA NA Xander Bogaerts flies out to right fi…
player2024_id <-
unique(
c(statcast2024$batter, statcast2024$pitcher,
statcast2024$on_1b, statcast2024$on_2b, statcast2024$on_3b,
statcast2024$fielder_2, statcast2024$fielder_3,
statcast2024$fielder_3, statcast2024$fielder_4,
statcast2024$fielder_5, statcast2024$fielder_6,
statcast2024$fielder_7, statcast2024$fielder_8,
statcast2024$fielder_9))
player2024_lookup <-
chadwick_players |>
dplyr::filter(!is.na(key_mlbam) & key_mlbam %in% player2024_id) |>
dplyr::mutate(
FullName = paste(name_first, name_last),
Name = stringi::stri_trans_general(FullName, "Latin-ASCII"))
save(player2024_lookup, file = "player2024_lookup.RData")baseballr::mlb_batting_orders(): retrieves batting order for every game# A tibble: 18 × 8
id fullName abbreviation batting_order batting_position_num team
<int> <chr> <chr> <chr> <chr> <chr>
1 605141 Mookie Betts SS 1 0 away
2 660271 Shohei Ohtani DH 2 0 away
3 518692 Freddie Freeman 1B 3 0 away
4 669257 Will Smith C 4 0 away
5 571970 Max Muncy 3B 5 0 away
6 606192 Teoscar Hernánd… RF 6 0 away
7 681546 James Outman CF 7 0 away
8 518792 Jason Heyward RF 8 0 away
9 666158 Gavin Lux 2B 9 0 away
10 593428 Xander Bogaerts 2B 1 0 home
11 665487 Fernando Tatis … RF 2 0 home
12 630105 Jake Cronenworth 1B 3 0 home
13 592518 Manny Machado DH 4 0 home
14 673490 Ha-Seong Kim SS 5 0 home
15 595777 Jurickson Profar LF 6 0 home
16 669134 Luis Campusano C 7 0 home
17 642180 Tyler Wade 3B 8 0 home
18 701538 Jackson Merrill CF 9 0 home
# ℹ 2 more variables: teamName <chr>, teamID <int>
poss_get_lineup <- purrr::possibly(.f = get_lineup, otherwise = NULL)
unik_game_pk <- unique(statcast2024$game_pk)
block_starts <- seq(1, length(unik_game_pk), by = 500)
block_ends <- c(block_starts[-1], length(unik_game_pk))
all_lineups <- list()
for(b in 1:5){
tmp <-
purrr::map(.x = unik_game_pk[block_starts[b]:block_ends[b]],
.f = poss_get_lineup,
.progress = TRUE)
all_lineups <- c(all_lineups, tmp)
}
lineups2024 <-
dplyr::bind_rows(all_lineups) |>
unique()
save(lineups2024, file = "lineups2024.RData")on_1b, on_2b, and on_3b: tells us who is on base101 for runners on 1st and 3rdouts_when_upstatcast2024 containing \(R_{i,a}\) valuesbat_score: batting team score before the pitch is thrownpost_bat_score: batting team score after pitch is thrown [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
bat_score 1 1 1 1 1 1 1 1 1 1 1 1
post_bat_score 1 1 1 1 1 1 1 1 1 1 1 1
[,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22]
bat_score 1 1 2 3 3 3 4 5 5 5
post_bat_score 1 2 3 3 3 4 5 5 5 5
[,23] [,24] [,25]
bat_score 5 5 5
post_bat_score 5 5 5
[1] "Enrique Hernández out on a sacrifice fly to left fielder José Azocar. Max Muncy scores."
[2] "Gavin Lux reaches on a fielder's choice, fielded by first baseman Jake Cronenworth. Teoscar Hernández scores. James Outman to 2nd. Fielding error by first baseman Jake Cronenworth."
[3] "Mookie Betts singles on a ground ball to left fielder José Azocar. James Outman scores. Gavin Lux to 2nd."
[4] "Shohei Ohtani singles on a line drive to left fielder José Azocar. Gavin Lux scores. Mookie Betts to 2nd."
group_by(game_pk, inning_number, inning_topbot)post_bat_score w/in half-inningdplyr::last(post_bat_score) - bat_scoreOuts and BaseRunner and average RunsRemaining# A tibble: 8 × 4
BaseRunner `Outs: 0` `Outs: 1` `Outs: 2`
<chr> <dbl> <dbl> <dbl>
1 000 0.488 0.262 0.0980
2 001 1.43 0.972 0.352
3 010 1.07 0.672 0.347
4 011 2.03 1.44 0.612
5 100 0.897 0.529 0.228
6 101 1.90 1.22 0.502
7 110 1.49 0.926 0.449
8 111 2.31 1.58 0.815
Outs=2, BaseRunner = '000': \(\rho \approx 0.1\)Outs=2, BaseRunner= '100': \(\rho \approx 0.22\)Outs and BaseRunner)bat_score from last post_bat_score in each at-batdplyr::lead() to get next value (next_Outs, next_BaseRuner)next_Outs and next_BaseRunner gives at-bat’s ending statedplyr::lead() produces NA’s at the end of half-inningend_expected_runs <-
expected_runs |>
dplyr::rename(
end_Outs = Outs,
end_BaseRunner = BaseRunner,
end_rho = rho)
runValue2024 <-
runValue2024 |>
dplyr::left_join(y = expected_runs, by = c("Outs", "BaseRunner")) |>
dplyr::left_join(y = end_expected_runs, by = c("end_Outs", "end_BaseRunner")) |>
dplyr::mutate(RunValue = RunsScored + end_rho - rho) |>
dplyr::select(game_pk, at_bat_number, RunValue)joining using game_pk and at_bat_numberohtani_id <-
player2024_lookup |>
dplyr::filter(FullName == "Shohei Ohtani") |>
dplyr::pull(key_mlbam)
ohtani_ab <-
statcast2024 |>
dplyr::filter(game_pk == 745444) |>
dplyr::filter(pitch_number == 1 & batter == ohtani_id) |>
dplyr::select(game_pk, at_bat_number, inning, des) |>
dplyr::inner_join(y = runValue2024, by = c("game_pk", "at_bat_number")) |>
dplyr::select(inning, RunValue, des)
ohtani_ab# A tibble: 5 × 3
inning RunValue des
<int> <dbl> <chr>
1 1 -0.367 Shohei Ohtani grounds into a force out, shortstop Ha-Seong Ki…
2 3 0.130 Shohei Ohtani singles on a sharp line drive to right fielder …
3 5 -0.367 Shohei Ohtani grounds into a force out, third baseman Tyler W…
4 7 -0.164 Shohei Ohtani grounds out softly, pitcher Wandy Peralta to fi…
5 8 1 Shohei Ohtani singles on a line drive to left fielder José Az…
batter in statcast2024 to look up player name in player2024_lookupplayer2024_lookup doesn’t have column batterkey_mlbam to batterRunValue across all at-bats (RE24)re24 <-
statcast2024 |>
dplyr::filter(pitch_number == 1) |>
dplyr::select(game_pk, at_bat_number, batter) |>
dplyr::inner_join(y = runValue2024, by = c("game_pk", "at_bat_number")) |>
dplyr::group_by(batter) |>
dplyr::summarise(RE24 = sum(RunValue),N = dplyr::n()) |>
dplyr::inner_join(y = tmp_lookup, by = "batter") |>
dplyr::select(Name, RE24, N)# A tibble: 10 × 3
Name RE24 N
<chr> <dbl> <int>
1 Aaron Judge 89.0 675
2 Juan Soto 74.4 693
3 Shohei Ohtani 73.1 708
4 Bobby Witt 65.9 694
5 Brent Rooker 47.9 599
6 Vladimir Guerrero 45.0 671
7 Ketel Marte 41.2 562
8 Kyle Schwarber 40.9 672
9 Joc Pederson 39.2 433
10 Jose Ramirez 39.1 657
BaseRunner: '100' \(\rightarrow\) '101')