Intergenerational Links:  NLSY79 Mothers and Their Children

STATA Code for Part A.

*sort two data sets by mother id, and then merge;
use child;
*rename mother id to match name of variable in mom dataset;
gen momid = C0000200;
sort momid;
save child, replace;

use mom;
*rename NLSY79  id to match name of mother id variable in child dataset;
gen momid = R0000100;
sort momid;
save mom, replace;

merge momid using child mom;
*eliminate NLSY79 respondents with no children in child data set, final data set has 11469 observations using data through 2006;
drop if C0000100 = = . ;

STATA Code for Part B.

*create variables for age and year of last interview for mom;
*note that we name these variables to start with an "m" to denote that these are variables for the mother;
gen m_age_lint = .;
gen m_year_lint = .;

replace m_age_lint = R0898310 if R0898310 > 0;
replace m_year_lint = 1982 if R0898310 > 0;
*repeat for all intervening years;
*note that we redefine these variables each time "age at interview" is reported to find the age at last interview;

replace m_age_lint = T0989000 if T0989000 > 0;
replace m_year_lint = 2006 if T0989000 > 0;
*create variable indicating that mom had a teen birth;
*note that we define this variable only for women ages 18 and over;
gen m_teenbirth = .;
*age at 1st birth is between 0 and 17;
replace m_teenbirth = 1 if m_age_lint >= 18 & m_year_lint = = 1982 & R0898840 > 0 & R0898840 < 18;
*age at 1st birth is 18 or greater, so no teen birth;
replace m_teenbirth = 0 if m_age_lint >= 18 & m_year_lint = = 1982 & R0898840 > = 18;
*never gave birth, so no teen birth;  
replace m_teenbirth = 0 if m_age_lint >= 18 & m_year_lint = = 1982 & R0898840 = = -998;
*repeat for each year-- this strategy lets us define these variables using data reported at the last interview;

replace m_teenbirth = 1 if m_age_lint >= 18 & m_year_lint = = 2006 & T0996200 > 0 & T0996200 < 18;
replace m_teenbirth = 0 if m_age_lint >= 18 & m_year_lint = = 2006 & T0996200 > = 18;
replace m_teenbirth = 0 if m_age_lint >= 18 & m_year_lint = = 2006 & T0996200 = = -998;

*using data through 2006;
*m_teenbirth (mean = .201, N = 11463);
*m_year_lint (mean =2002, N = 11465);
*m_age_lint (mean =41.4, N = 11465);

STATA Code for Part C.

*restrict sample to female young adults, drops 8322 observations using data through 2006;
*sample size is now 3147;
keep if Y0677400 = = 2;
*create variables for age and year of last interview for young adult;
*note that we name these variables to start with "y" to denote that these are variables for the young adult;
gen y_year_lint = .;
gen y_age_lint = .;
replace y_year_lint = Y1205100 if Y1205100 > 0;
replace y_age_lint = Y0342400 if y_year_lint = =1994;
*repeat for each year--this strategy lets us define these variables using data reported at the last interview;

replace y_age_lint = Y1948500 if y_year_lint = = 2006;
*create variable indicating that female young adults who are 18 or over had a teen birth;
gen y_teenbirth = .;
replace y_teenbirth = 1 if y_age_lint >= 18 & Y1211100 > 0 & Y1211100 < 18;
replace y_teenbirth = 0 if y_age_lint >= 18 & Y1211100 >= 18;
replace y_teenbirth = 0 if y_age_lint >= 18 & Y1211100 = = -998;

*Final Statistics from Program:  Data through 2006 survey;
*m_teenbirth (mean = .249, N = 3147);
*y_teenbirth (mean = .137, N = 2419)—smaller sample size because only created for those at least 18;
*y_year_lint (mean = 2006, N = 3147)
*y_age_lint (mean = 21.3, N = 3147)
*m_year_lint (mean = 2005, N = 3147)
*m_age_lint (mean = 44.6, N = 3147)