Course Overview
[1] "game_id" "season"
[3] "season_type" "game_date"
[5] "game_date_time" "athlete_id"
[7] "athlete_display_name" "team_id"
[9] "team_name" "team_location"
[11] "team_short_display_name" "minutes"
[13] "field_goals_made" "field_goals_attempted"
[15] "three_point_field_goals_made" "three_point_field_goals_attempted"
[17] "free_throws_made" "free_throws_attempted"
[19] "offensive_rebounds" "defensive_rebounds"
[21] "rebounds" "assists"
[23] "steals" "blocks"
[25] "turnovers" "fouls"
[27] "plus_minus" "points"
[29] "starter" "ejected"
[31] "did_not_play" "active"
[33] "athlete_jersey" "athlete_short_name"
[35] "athlete_headshot_href" "athlete_position_name"
[37] "athlete_position_abbreviation" "team_display_name"
[39] "team_uid" "team_slug"
[41] "team_logo" "team_abbreviation"
[43] "team_color" "team_alternate_color"
[45] "home_away" "team_winner"
[47] "team_score" "opponent_team_id"
[49] "opponent_team_name" "opponent_team_location"
[51] "opponent_team_display_name" "opponent_team_abbreviation"
[53] "opponent_team_logo" "opponent_team_color"
[55] "opponent_team_alternate_color" "opponent_team_score"
[57] "reason"
raw_box |> dplyr::filter(game_date == "2011-06-12") |>
dplyr::select(athlete_display_name,
field_goals_made, field_goals_attempted)# A tibble: 30 × 3
athlete_display_name field_goals_made field_goals_attempted
<chr> <int> <int>
1 Dirk Nowitzki 9 27
2 Tyson Chandler 2 4
3 Jason Kidd 2 4
4 Shawn Marion 4 10
5 J.J. Barea 7 12
6 Brian Cardinal 1 1
7 Caron Butler NA NA
8 Ian Mahinmi 2 3
9 Rodrigue Beaubois NA NA
10 DeShawn Stevenson 3 5
# ℹ 20 more rows
allstar_dates <- lubridate::date(c("2002-02-10", "2003-02-09",
"2004-02-15","2005-02-20", "2006-02-19", "2007-02-18",
"2008-02-17", "2009-02-15", "2010-02-14","2011-02-20",
"2012-02-26", "2013-02-17", "2014-02-16", "2015-02-15",
"2016-02-14","2017-02-19", "2018-02-18", "2019-02-17",
"2020-02-16", "2021-03-07", "2022-02-20","2023-02-19",
"2024-02-18", "2025-02-16"))
reg_box <- raw_box |>
dplyr::filter(season_type == 2 & !did_not_play & !game_date %in% allstar_dates)reg_box <-
reg_box |>
dplyr::rename(
Player = athlete_display_name,
FGM = field_goals_made, FGA = field_goals_attempted,
TPM = three_point_field_goals_made,TPA = three_point_field_goals_attempted,
FTM = free_throws_made, FTA = free_throws_attempted) |>
dplyr::mutate(
FGM = ifelse(is.na(minutes), 0, FGM), FGA = ifelse(is.na(minutes), 0, FGA),
TPM = ifelse(is.na(minutes), 0, TPM),TPA = ifelse(is.na(minutes), 0, TPA),
FTM = ifelse(is.na(minutes), 0, FTM),FTA = ifelse(is.na(minutes), 0, FTA)) |>
tidyr::replace_na(list(minutes = 0)) # A tibble: 18 × 10
Player season FGM FGA TPM TPA FTM FTA minutes n_games
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Dirk Nowitzki 2002 600 1258 139 350 440 516 2891 76
2 Dirk Nowitzki 2003 690 1489 148 390 483 548 3117 80
3 Dirk Nowitzki 2004 605 1310 99 290 371 423 2915 77
4 Dirk Nowitzki 2005 663 1445 91 228 615 708 3020 78
5 Dirk Nowitzki 2006 751 1564 110 271 539 598 3086 81
6 Dirk Nowitzki 2007 673 1341 72 173 498 551 2819 80
7 Dirk Nowitzki 2008 630 1314 79 220 478 544 2769 81
8 Dirk Nowitzki 2009 774 1616 61 170 485 545 3051 82
9 Dirk Nowitzki 2010 720 1496 51 121 536 586 3041 82
10 Dirk Nowitzki 2011 610 1179 66 168 395 443 2505 82
11 Dirk Nowitzki 2012 473 1034 78 212 318 355 2078 66
12 Dirk Nowitzki 2013 332 707 63 151 164 191 1628 52
13 Dirk Nowitzki 2014 633 1273 131 329 338 376 2625 80
14 Dirk Nowitzki 2015 487 1062 104 274 255 289 2285 77
15 Dirk Nowitzki 2016 498 1112 126 342 250 280 2362 75
16 Dirk Nowitzki 2017 296 678 79 209 98 112 1421 54
17 Dirk Nowitzki 2018 346 758 138 337 97 108 1901 77
18 Dirk Nowitzki 2019 135 376 64 205 39 50 794 51
season_box |>
dplyr::arrange(dplyr::desc(FGM)) |>
dplyr::select(Player, season, FGM, FGA, minutes, n_games) |>
dplyr::slice_head(n=5)# A tibble: 5 × 6
Player season FGM FGA minutes n_games
<chr> <int> <dbl> <dbl> <dbl> <int>
1 Kobe Bryant 2006 949 2109 3184 78
2 LeBron James 2006 875 1823 3361 82
3 Kobe Bryant 2003 868 1924 3401 82
4 Shai Gilgeous-Alexander 2025 868 1680 2633 77
5 LeBron James 2018 857 1580 3024 82
season_box |>
dplyr::arrange(dplyr::desc(FGP)) |>
dplyr::slice_head(n=5) |>
dplyr::select(Player, season, FGP, FGA)# A tibble: 5 × 4
Player season FGP FGA
<chr> <int> <dbl> <dbl>
1 Ahmad Caver 2022 1 1
2 Alondes Williams 2025 1 2
3 Andris Biedrins 2014 1 1
4 Anthony Brown 2018 1 1
5 Braxton Key 2023 1 1
season_box |>
dplyr::filter(FGA >= 400) |>
dplyr::arrange(dplyr::desc(FGP)) |>
dplyr::select(Player, season, FGP, FGA) |>
dplyr::slice_head(n = 5)# A tibble: 5 × 4
Player season FGP FGA
<chr> <int> <dbl> <dbl>
1 Daniel Gafford 2024 0.725 480
2 Walker Kessler 2023 0.720 414
3 DeAndre Jordan 2017 0.714 577
4 Rudy Gobert 2022 0.713 508
5 DeAndre Jordan 2015 0.710 534
# A tibble: 5 × 6
Player season eFGP FGP TPA n_games
<chr> <int> <dbl> <dbl> <dbl> <int>
1 Daniel Gafford 2024 0.725 0.725 0 74
2 Walker Kessler 2023 0.721 0.720 3 74
3 DeAndre Jordan 2017 0.714 0.714 2 81
4 Rudy Gobert 2022 0.713 0.713 4 66
5 DeAndre Jordan 2015 0.711 0.710 4 82
# A tibble: 5 × 6
Player season eFGP FGP TPA n_games
<chr> <int> <dbl> <dbl> <dbl> <int>
1 Kyle Korver 2015 0.671 0.487 449 75
2 Duncan Robinson 2020 0.667 0.470 606 73
3 Obi Toppin 2024 0.660 0.571 260 83
4 Nikola Jokic 2023 0.660 0.632 149 69
5 Joe Harris 2021 0.655 0.502 427 69
“what is the probability that a player makes a shot”
I highly recommend using Quarto or RMarkdown
Analyze tracking data
NFL’s Big Data Bowl is a great opportunity
I highly encourage turning Project 3 into a BDB submission
Respect diverse backgrounds
Don’t hesitate to ask for and to provide help!
Take care of yourself & each other
Peer Review
Respect your classmates enough to review their projects yourself. Uploading someone else’s project report or presentation to a generative AI tool (e.g., for creating summaries) is forbidden and will result in a failing grade.