BirthsR Documentation

US Births

Description

Number of births in the United States. There are several data sets covering different date ranges and obtaining data from different sources.

Usage

data(Births)

data(Births78)

data(Births2015)

data(BirthsSSA)

data(BirthsCDC)

Format

A data.frame with the following 8 variables.

date

Date

births

Number of births on date (integer)

wday

Day of week (ordered factor)

year

Year (integer)

month

Month (integer)

day_of_year

Day of year (integer)

day_of_month

Day of month (integer)

day_of_week

Day of week (integer)

Details

There are some overlapping dates in the various data sets, but the number of births does not always agree due to the different sources of the data. See the examples.

Source

See Also

Birthdays for a data set aggregated at the state level.

Examples

data(Births78)
data(Births2015)
data(Births)
data(BirthsSSA)
data(BirthsCDC)
# date ranges for the different data sets
lapply(
  list(Births = Births, Births78 = Births78, Biths2015 = Births2015, BirthsSSA = BirthsSSA,
       BirthsCDC = BirthsCDC),
       function(x) range(x$date))
range(Births78$date)
range(Births2015$date)
range(Births$date)
range(BirthsSSA$date)
range(BirthsCDC$date)

# Births and Births78 have slightly different numbers of births

if(require(ggplot2)) {
  ggplot(data = Births, aes(x = date, y = births, colour = ~ wday)) +
    stat_smooth(se = FALSE, alpha = 0.8, geom = "line")
  ggplot(data = Births, aes(x = day_of_year, y = births, colour = ~ wday)) +
    geom_point(size = 0.4, alpha = 0.5) +
    stat_smooth(se = FALSE, geom = "line", alpha = 0.6, size = 1.5)
  if (require(dplyr)) {
    ggplot(
     data =  bind_cols(Births |> filter(year == 1978),
                       Births78 |> rename(births78 = births)),
     aes(x = births - births78)
     ) +
     geom_histogram(binwidth = 1)
  }
}

if(require(ggplot2)) {
  ggplot(data = Births, aes(x = date, y = births, colour = ~ wday)) +
    stat_smooth(se = FALSE, alpha = 0.8, geom = "line")
  ggplot(data = Births, aes(x = day_of_year, y = births, colour = ~ wday)) +
    geom_point(size = 0.4, alpha = 0.5) +
    stat_smooth(se = FALSE, geom = "line", alpha = 0.6, size = 1.5)
  if (require(dplyr)) {
    ggplot(
     data =  bind_cols(Births |> filter(year == 1978),
                       Births78 |> rename(births78 = births)),
     aes(x = births - births78)
     ) +
     geom_histogram(binwidth = 1)

    # SSA records more births than CDC
    ggplot(
     data =  bind_cols(BirthsSSA |> filter(year <= 2003) |> rename(SSA = births),
                       BirthsCDC |> filter(year >= 2000) |> rename(CDC = births)),
     aes(x = SSA - CDC)
     ) +
     geom_histogram(binwidth = 10)
  }
}