mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
Added to statistical inference section with scipy: hypothesis testing notebook.
This commit is contained in:
parent
85675abda6
commit
6a1a2aff7c
BIN
scipy/2002FemPreg.dat.gz
Normal file
BIN
scipy/2002FemPreg.dat.gz
Normal file
Binary file not shown.
245
scipy/2002FemPreg.dct
Normal file
245
scipy/2002FemPreg.dct
Normal file
|
@ -0,0 +1,245 @@
|
|||
infile dictionary {
|
||||
_column(1) str12 caseid %12s "RESPONDENT ID NUMBER"
|
||||
_column(13) byte pregordr %2f "PREGNANCY ORDER (NUMBER)"
|
||||
_column(15) byte howpreg_n %2f "BB-2 # OF WEEKS OR MONTHS CURRENTLY PREGNANT"
|
||||
_column(17) byte howpreg_p %1f "BB-2 CURRENT PREGNANCY LENGTH REPORTED IN MONTHS OR WEEKS"
|
||||
_column(18) byte moscurrp %1f "NUMBER OF MONTHS CURRENTLY PREGNANT"
|
||||
_column(19) byte nowprgdk %1f "BB-3 WHICH TRIMESTER -- CURRENT PREGNANCY"
|
||||
_column(20) byte pregend1 %1f "BC-1 HOW PREGNANCY ENDED - 1ST MENTION"
|
||||
_column(21) byte pregend2 %1f "BC-1 HOW PREGNANCY ENDED - 2ND MENTION"
|
||||
_column(22) byte nbrnaliv %1f "BC-2 NUMBER OF BABIES BORN ALIVE FROM THIS PREGNANCY"
|
||||
_column(23) byte multbrth %1f "BC-3 WAS THIS A MULTIPLE BIRTH"
|
||||
_column(24) int cmotpreg %4f "CM FOR PREGNANCY END DATE (IF NONLIVEBIRTH)"
|
||||
_column(28) byte prgoutcome %1f "OUTCOME OF PREGNANCY (BASED ON PRIORITY ORDERING)"
|
||||
_column(29) int cmprgend %4f "CM FOR PREGNANCY END DATE (REGARDLESS OF OUTCOME)"
|
||||
_column(33) byte flgdkmo1 %1f "FLAG INDICATING SEASON/DK/RF FOR BC-4A DATPRGEN_M"
|
||||
_column(34) int cmprgbeg %4f "CM FOR PREGNANCY START DATE"
|
||||
_column(38) byte ageatend %2f "BC-4B R'S AGE AT PREGNANCY'S END DATE"
|
||||
_column(40) byte hpageend %2f "BC-4C FATHER'S AGE AT PREGNANCY'S END DATE"
|
||||
_column(42) byte gestasun_m %2f "BC-5 GESTATIONAL LENGTH OF PREGNANCY IN MONTHS"
|
||||
_column(44) byte gestasun_w %2f "BC-5 GESTATIONAL LENGTH OF PREGNANCY IN WEEKS"
|
||||
_column(46) byte wksgest %2f "GESTATIONAL LENGTH OF COMPLETED PREGNANCY (IN WEEKS)"
|
||||
_column(48) byte mosgest %2f "GESTATIONAL LENGTH OF COMPLETED PREGNANCY (IN MONTHS)"
|
||||
_column(50) byte dk1gest %1f "BC-6 DK FOLLOWUP FOR GESTATIONAL LENGTH OF A STILLBIRTH"
|
||||
_column(51) byte dk2gest %1f "BC-7 DK FOLLOWUP FOR GESTATIONAL LENGTH OF A LIVEBIRTH"
|
||||
_column(52) byte dk3gest %1f "BC-8 DK FOLLOWUP FOR GESTATIONAL LENGTH OF A MISCARR/ABOR/ECTOP"
|
||||
_column(53) byte bpa_bdscheck1 %1f "WHETHER 1ST LIVEBORN BABY FROM THIS PREGNANCY WAS BPA OR BDS"
|
||||
_column(54) byte bpa_bdscheck2 %1f "WHETHER 2ND LIVEBORN BABY FROM THIS PREGNANCY WAS BPA OR BDS"
|
||||
_column(55) byte bpa_bdscheck3 %1f "WHETHER 3RD LIVEBORN BABY FROM THIS PREGNANCY WAS BPA OR BDS"
|
||||
_column(56) byte babysex %1f "BD-2 SEX OF 1ST LIVEBORN BABY FROM THIS PREGNANCY"
|
||||
_column(57) byte birthwgt_lb %2f "BD-3 BIRTHWEIGHT IN POUNDS - 1ST BABY FROM THIS PREGNANCY"
|
||||
_column(59) byte birthwgt_oz %2f "BD-3 BIRTHWEIGHT IN OUNCES - 1ST BABY FROM THIS PREGNANCY"
|
||||
_column(61) byte lobthwgt %1f "BD-4 IS BABY LOW BIRTHWEIGHT- 1ST BABY FROM THIS PREGNANCY"
|
||||
_column(62) byte babysex2 %1f "BD-2 SEX OF 2ND LIVEBORN BABY FROM THIS PREGNANCY"
|
||||
_column(63) byte birthwgt_lb2 %2f "BD-3 BIRTHWEIGHT IN POUNDS - 2ND BABY FROM THIS PREGNANCY"
|
||||
_column(65) byte birthwgt_oz2 %2f "BD-3 BIRTHWEIGHT IN OUNCES - 2ND BABY FROM THIS PREGNANCY"
|
||||
_column(67) byte lobthwgt2 %1f "BD-4 IS BABY LOW BIRTHWEIGHT- 2ND BABY FROM THIS PREGNANCY"
|
||||
_column(68) byte babysex3 %1f "BD-2 SEX OF 3RD LIVEBORN BABY FROM THIS PREGNANCY"
|
||||
_column(69) byte birthwgt_lb3 %2f "BD-3 BIRTHWEIGHT IN POUNDS - 3RD BABY FROM THIS PREGNANCY"
|
||||
_column(71) byte birthwgt_oz3 %2f "BD-3 BIRTHWEIGHT IN OUNCES - 3RD BABY FROM THIS PREGNANCY"
|
||||
_column(73) byte lobthwgt3 %1f "BD-4 IS BABY LOW BIRTHWEIGHT- 3RD BABY FROM THIS PREGNANCY"
|
||||
_column(74) int cmbabdob %4f "CM FOR BABY'S OR BABIES' DATE OF BIRTH (DELIVERY DATE)"
|
||||
_column(78) int kidage %4f "CURRENT AGE (IN MOS) OF R'S CHILD(REN) FROM THIS PREGNANCY"
|
||||
_column(82) byte hpagelb %2f "BD-6 FATHER'S AGE AT TIME OF CHILD(REN) S BIRTH"
|
||||
_column(84) byte birthplc %1f "BD-7 PLACE WHERE R GAVE BIRTH"
|
||||
_column(85) byte paybirth1 %1f "BD-8 PAYMENT FOR DELIVERY - 1ST MENTION"
|
||||
_column(86) byte paybirth2 %1f "BD-8 PAYMENT FOR DELIVERY - 2ND MENTION"
|
||||
_column(87) byte paybirth3 %1f "BD-8 PAYMENT FOR DELIVERY - 3RD MENTION"
|
||||
_column(88) byte knewpreg %2f "BE-1 WEEKS PREGNANT WHEN R LEARNED SHE WAS PREGNANT"
|
||||
_column(90) byte trimestr %1f "BE-2A DK FOLLOWUP FOR KNEWPREG WHEN GESTATION >= 6 MOS"
|
||||
_column(91) byte ltrimest %1f "BE-2B DK FOLLOWUP FOR KNEWPREG WHEN GESTATION < 6 MOS"
|
||||
_column(92) byte priorsmk %1f "BE-3 AMOUNT R'SMOKED IN 6 MOS BEFORE R KNEW SHE WAS PREGNANT"
|
||||
_column(93) byte postsmks %1f "BE-4 R'SMOKED AT ALL AFTER R KNEW SHE WAS PREGNANT"
|
||||
_column(94) byte npostsmk %1f "BE-5 AMOUNT R'SMOKED DURING PREGNANCY AFTER R KNEW SHE WAS PREG"
|
||||
_column(95) byte getprena %1f "BE-6 ANY PRENATAL CARE FOR THIS PREGNANCY"
|
||||
_column(96) byte bgnprena %2f "BE-7 WEEKS PREGNANT AT FIRST PRENATAL CARE VISIT"
|
||||
_column(98) byte pnctrim %1f "BE-8A DK FOLLOWUP FOR BGNPRENA WHEN GESTATION >= 6 MOS"
|
||||
_column(99) byte lpnctri %1f "BE-8B DK FOLLOWUP FOR BGNPRENA WHEN GESTATION < 6 MOS"
|
||||
_column(100) byte workpreg %1f "BF-1 R WORKED AT ALL DURING THIS PREGNANCY"
|
||||
_column(101) byte workborn %1f "BF-2 MATERNITY LEAVE TAKEN FOR THIS PREGNANCY"
|
||||
_column(102) byte didwork %1f "BF-3 WHY NO MATERNITY LEAVE WAS TAKEN FOR THIS PREGNANCY"
|
||||
_column(103) byte matweeks %2f "BF-4 WEEKS OF MATERNITY LEAVE TAKEN FOR THIS PREGNANCY"
|
||||
_column(105) byte weeksdk %1f "BF-5 DK FOLLOWUP - WAS MATERNITY LEAVE <=4 OR > 4 WEEKS"
|
||||
_column(106) byte matleave %2f "BF-6 WEEKS OF PAID MATERNITY LEAVE FOR THIS PREGNANCY"
|
||||
_column(108) byte matchfound %1f "CHECK ON WHETHER CHILD MATCHES BIO CHILD IN HH ROSTER - 1ST"
|
||||
_column(109) byte livehere %1f "BG-1 WHETHER CHILD LIVES WITH R - 1ST FROM THIS PREGNANCY"
|
||||
_column(110) byte alivenow %1f "BG-2 WHETHER CHILD IS STILL ALIVE - 1ST FROM THIS PREGNANCY"
|
||||
_column(111) int cmkidied %4f "CM FOR CHLD'S DATE OF DEATH - 1ST FROM THIS PREGNANCY"
|
||||
_column(115) int cmkidlft %4f "CM FOR DATE CHILD STOPPED LIVING W/R - 1ST FROM THIS PREGNANCY"
|
||||
_column(119) int lastage %3f "AGE (IN MOS) WHEN CHILD LAST LIVED W/R-1ST FROM THIS PREGNANCY"
|
||||
_column(122) byte wherenow %1f "BG-5 WHERE CHILD LIVES NOW - 1ST FROM THIS PREGNANCY"
|
||||
_column(123) byte legagree %1f "BG-6 LEGAL AGREEMENT FOR WHERE CHILD LIVES - 1ST FROM THIS PREG"
|
||||
_column(124) byte parenend %1f "BG-7 IS R STILL LEGAL MOTHER OF CHILD - 1ST FROM THIS PREGNANCY"
|
||||
_column(125) byte anynurse %1f "BH-1 WHETHER R BREASTFED THIS CHILD AT ALL - 1ST FROM THIS PREG"
|
||||
_column(126) byte fedsolid %1f "BH-2 HAS R BEGUN SUPPLEMENTATION FOR CHILD - 1ST FROM THIS PREG"
|
||||
_column(127) int frsteatd_n %3f "BH-3 AGE (MOS/WKS/DAY) WHEN 1ST SUPPLEMENTED - 1ST FROM THIS PREG"
|
||||
_column(130) byte frsteatd_p %1f "BH-3 UNITS (MOS/WKS/DAYS) FOR FRSTEATD_N - 1ST FROM THIS PREG"
|
||||
_column(131) int frsteatd %3f "AGE (IN MOS) WHEN 1ST SUPPLEMENTED - 1ST FROM THIS PREG"
|
||||
_column(134) byte quitnurs %1f "BH-4 HAS R'STOPPED BREASTFEEDING CHILD - 1ST FROM THIS PREG"
|
||||
_column(135) int ageqtnur_n %3f "BH-5 AGE (MOS/WKS/DAY) WHEN STOPPED BREASTFEEDING - 1ST FROM THIS PREG"
|
||||
_column(138) byte ageqtnur_p %1f "BH-5 UNITS (MOS/WKS/DAYS) FOR AGEQTNUR_N - 1ST FROM THIS PREG"
|
||||
_column(139) int ageqtnur %3f "AGE (IN MOS) WHEN R'STOPPED NURSING CHILD - 1ST FROM THIS PREG"
|
||||
_column(142) byte matchfound2 %1f "CHECK ON WHETHER CHILD MATCHES BIO CHILD IN HH ROSTER - 2ND"
|
||||
_column(143) byte livehere2 %1f "BG-1 WHETHER CHILD LIVES WITH R - 2ND FROM THIS PREGNANCY"
|
||||
_column(144) byte alivenow2 %1f "BG-2 WHETHER CHILD IS STILL ALIVE - 2ND FROM THIS PREGNANCY"
|
||||
_column(145) int cmkidied2 %4f "CM FOR CHLD'S DATE OF DEATH - 2ND FROM THIS PREGNANCY"
|
||||
_column(149) int cmkidlft2 %4f "CM FOR DATE CHILD STOPPED LIVING W/R - 2ND FROM THIS PREGNANCY"
|
||||
_column(153) int lastage2 %3f "AGE (IN MOS) WHEN CHILD LAST LIVED W/R - 2ND FROM THIS PREGNANCY"
|
||||
_column(156) byte wherenow2 %1f "BG-5 WHERE CHILD LIVES NOW - 2ND FROM THIS PREGNANCY"
|
||||
_column(157) byte legagree2 %1f "BG-6 LEGAL AGREEMENT FOR WHERE CHILD LIVES - 2ND FROM THIS PREG"
|
||||
_column(158) byte parenend2 %1f "BG-7 IS R STILL LEGAL MOTHER OF CHILD - 2ND FROM THIS PREGNANCY"
|
||||
_column(159) byte anynurse2 %1f "BH-1 WHETHER R BREASTFED THIS CHILD AT ALL - 2ND FROM THIS PREG"
|
||||
_column(160) byte fedsolid2 %1f "BH-2 HAS R BEGUN SUPPLEMENTATION FOR CHILD - 2ND FROM THIS PREG"
|
||||
_column(161) byte frsteatd_n2 %2f "BH-3 AGE (MOS/WKS/DAY) WHEN 1ST SUPPLEMENTED - 2ND FROM THIS PREG"
|
||||
_column(163) byte frsteatd_p2 %1f "BH-3 UNITS (MOS/WKS/DAYS) FOR FRSTEATD_N - 2ND FROM THIS PREG"
|
||||
_column(164) byte frsteatd2 %2f "AGE (IN MOS) WHEN 1ST SUPPLEMENTED - 2ND FROM THIS PREG"
|
||||
_column(166) byte quitnurs2 %1f "BH-4 HAS R'STOPPED BREASTFEEDING CHILD - 2ND FROM THIS PREG"
|
||||
_column(167) byte ageqtnur_n2 %2f "BH-5 AGE (MOS/WKS/DAY) WHEN STOPPED BREASTFEEDING - 2ND FROM THIS PREG"
|
||||
_column(169) byte ageqtnur_p2 %1f "BH-5 UNITS (MOS/WKS/DAYS) FOR AGEQTNUR_N - 2ND FROM THIS PREG"
|
||||
_column(170) byte ageqtnur2 %2f "AGE (IN MOS) WHEN R'STOPPED NURSING CHILD - 2ND FROM THIS PREG"
|
||||
_column(172) byte matchfound3 %1f "CHECK ON WHETHER CHILD MATCHES BIO CHILD IN HH ROSTER - 3RD"
|
||||
_column(173) byte livehere3 %1f "BG-1 WHETHER CHILD LIVES WITH R - 3RD FROM THIS PREGNANCY"
|
||||
_column(174) byte alivenow3 %1f "BG-2 WHETHER CHILD IS STILL ALIVE - 3RD FROM THIS PREGNANCY"
|
||||
_column(175) int cmkidied3 %4f "CM FOR CHLD'S DATE OF DEATH - 3RD FROM THIS PREGNANCY"
|
||||
_column(179) int cmkidlft3 %4f "CM FOR DATE CHILD STOPPED LIVING W/R - 3RD FROM THIS PREGNANCY"
|
||||
_column(183) int lastage3 %3f "AGE (IN MOS) WHEN CHILD LAST LIVED W/R - 3RD FROM THIS PREGNANCY"
|
||||
_column(186) byte wherenow3 %1f "BG-5 WHERE CHILD LIVES NOW - 3RD FROM THIS PREGNANCY"
|
||||
_column(187) byte legagree3 %1f "BG-6 LEGAL AGREEMENT FOR WHERE CHILD LIVES - 3RD FROM THIS PREG"
|
||||
_column(188) byte parenend3 %1f "BG-7 IS R STILL LEGAL MOTHER OF CHILD - 3RD FROM THIS PREGNANCY"
|
||||
_column(189) byte anynurse3 %1f "BH-1 WHETHER R BREASTFED THIS CHILD AT ALL - 3RD FROM THIS PREG"
|
||||
_column(190) byte fedsolid3 %1f "BH-2 HAS R BEGUN SUPPLEMENTATION FOR CHILD - 3RD FROM THIS PREG"
|
||||
_column(191) byte frsteatd_n3 %1f "BH-3 AGE (MOS/WKS/DAY) WHEN 1ST SUPPLEMENTED - 3RD FROM THIS PREG"
|
||||
_column(192) byte frsteatd_p3 %1f "BH-3 UNITS (MOS/WKS/DAYS) FOR FRSTEATD_N - 3RD FROM THIS PREG"
|
||||
_column(193) byte frsteatd3 %1f "AGE (IN MOS) WHEN 1ST SUPPLEMENTED - 3RD FROM THIS PREG"
|
||||
_column(194) byte quitnurs3 %1f "BH-4 HAS R'STOPPED BREASTFEEDING CHILD - 3RD FROM THIS PREG"
|
||||
_column(195) byte ageqtnur_n3 %1f "BH-5 AGE (MOS/WKS/DAY) WHEN STOPPED BREASTFEEDING - 3RD FROM THIS PREG"
|
||||
_column(196) byte ageqtnur_p3 %1f "BH-5 UNITS (MOS/WKS/DAYS) FOR AGEQTNUR_N - 3RD FROM THIS PREG"
|
||||
_column(197) byte ageqtnur3 %1f "AGE (IN MOS) WHEN R'STOPPED NURSING CHILD - 3RD FROM THIS PREG"
|
||||
_column(198) int cmlastlb %4f "CM FOR R'S MOST RECENT LIVE BIRTH"
|
||||
_column(202) int cmfstprg %4f "CM FOR R'S FIRST COMPLETED PREGNANCY"
|
||||
_column(206) int cmlstprg %4f "CM FOR R'S MOST RECENT COMPLETED PREGNANCY"
|
||||
_column(210) int cmintstr %4f "CM FOR DATE OF BEGINNING OF PREGNANCY INTERVAL"
|
||||
_column(214) int cmintfin %4f "CM FOR DATE OF END OF PREGNANCY INTERVAL"
|
||||
_column(218) int cmintstrop %4f "OPEN INTERVAL: CM OF DATE OF BEGINNING"
|
||||
_column(222) int cmintfinop %4f "OPEN INTERVAL: CM OF DATE OF END (MON OF INTERVIEW)"
|
||||
_column(226) int cmintstrcr %4f "CURRENTLY PREGNANT: CM OF DATE OF BEGINNING OF INTERVAL"
|
||||
_column(230) int cmintfincr %4f "CURRENTLY PREGNANT: CM OF DATE OF END OF INTERVAL (MON OF INTERVIEW)"
|
||||
_column(234) byte evuseint %1f "EG-1 USE ANY METHOD IN PREGNANCY INTERVAL?"
|
||||
_column(235) byte stopduse %1f "EG-2 BEFORE YOU BECAME PREG, STOP USING ALL METHODS?"
|
||||
_column(236) byte whystopd %1f "EG-3 STOP USING METHODS BEFORE PREG BECAUSE WANTED PREG?"
|
||||
_column(237) byte whatmeth01 %2f "EG-4 METHOD(S) USING WHEN BECAME PREG - 1ST MENTION"
|
||||
_column(239) byte whatmeth02 %2f "EG-4 METHOD(S) USING WHEN BECAME PREG - 2ND MENTION"
|
||||
_column(241) byte whatmeth03 %2f "EG-4 METHOD(S) USING WHEN BECAME PREG - 3RD MENTION"
|
||||
_column(243) byte whatmeth04 %2f "EG-4 METHOD(S) USING WHEN BECAME PREG - 4TH MENTION"
|
||||
_column(245) byte resnouse %1f "EG-5 REASON NOT USING/HAD STOPPED USING METHOD BEC. WANTED PREG?"
|
||||
_column(246) byte wantbold %1f "EG-6 RIGHT BEF PREG, WANT TO HAVE BABY AT ANY TIME IN FUTURE?"
|
||||
_column(247) byte probbabe %1f "EG-7 PROBABLY WANT BABY AT ANY TIME OR NOT?"
|
||||
_column(248) byte cnfrmno %1f "EG-8 VERIFY DIDN'T WANT BABY AT ANY TIME IN FUTURE"
|
||||
_column(249) byte wantbld2 %1f "EG-9 RIGHT BEFORE PREG, WANT TO HAVE BABY AT ANY TIME IN FUTURE? (2ND ASKING)"
|
||||
_column(250) byte timingok %1f "EG-10 BECOME PREG TOO SOON, RIGHT TIME, OR LATER THAN YOU WANTED?"
|
||||
_column(251) int toosoon_n %3f "EG-11 HOW MUCH SOONER THAN WANTED BECAME PREG (MONTHS OR YEARS)"
|
||||
_column(254) byte toosoon_p %1f "EG-11 CHOOSE MONS OR YRS FOR HOW MUCH SOONER BECAME PREG THAN WANTED"
|
||||
_column(255) byte wthpart1 %1f "EG-12A RIGHT BEFORE PREG, WANT TO HAVE BABY WITH THAT PARTNER?"
|
||||
_column(256) byte wthpart2 %1f "EG-12B RIGHT BEF. PREG, THINK MIGHT EVER WANT TO HAVE BABY W/THAT PARTNER?"
|
||||
_column(257) byte feelinpg %2f "EG-13 HAPPINESS TO BE PREG. SCALE (1-10)"
|
||||
_column(259) byte hpwnold %1f "EG-16 RIGHT BEF PREG, DID THE FATHER WANT R TO HAVE BABY AT ANY TIME IN FUTURE?"
|
||||
_column(260) byte timokhp %1f "EG-17 R BECAME PREG SOONER, RIGHT TIME, OR LATER THAN FATHER OF PREG WANTED"
|
||||
_column(261) byte cohpbeg %1f "EG-18A WAS R LIVING W/FATHER OF PREG AT BEGINNING OF PREG"
|
||||
_column(262) byte cohpend %1f "EG-18B WAS R LIVING W/FATHER OF PREG WHEN PREG ENDED/BABY WAS BORN"
|
||||
_column(263) byte tellfath %1f "EG-19 DID R TELL FATHER OF PREG THAT SHE WAS PREGNANT"
|
||||
_column(264) byte whentell %1f "EG-20 WHEN DID R TELL FATHER OF PREG ABOUT PREGNANCY: DURING OR AFTER?"
|
||||
_column(265) byte tryscale %2f "EG-21 HOW HARD TRYING TO GET/AVOID PREGNANCY (0-10)"
|
||||
_column(267) byte wantscal %2f "EG-22 HOW MUCH WANTED TO GET/AVOID PREGNANCY (0-10)"
|
||||
_column(269) byte whyprg1 %1f "EG-23 (UNINTENDED PREG): METHOD FAIL OR R WASN T USING PROPERLY-1ST MENTION"
|
||||
_column(270) byte whyprg2 %1f "EG-23 (UNINTENDED PREG): METHOD FAIL OR R WASN T USING PROPERLY-2ND MENTION"
|
||||
_column(271) byte whynouse1 %1f "EG-24 (UNINTENDED PREG) REASON DIDN'T USE METHOD - 1ST MENTION"
|
||||
_column(272) byte whynouse2 %1f "EG-24 (UNINTENDED PREG) REASON DIDN'T USE METHOD - 2ND MENTION"
|
||||
_column(273) byte whynouse3 %1f "EG-24 (UNINTENDED PREG) REASON DIDN'T USE METHOD - 3RD MENTION"
|
||||
_column(274) byte anyusint %1f "ANY METHOD USE IN PREGNANCY INTERVAL"
|
||||
_column(275) byte prglngth %2f "DURATION OF COMPLETED PREGNANCY IN WEEKS"
|
||||
_column(277) byte outcome %1f "PREGNANCY OUTCOME"
|
||||
_column(278) byte birthord %2f "BIRTH ORDER"
|
||||
_column(280) int datend %4f "CM DATE PREGNANCY ENDED"
|
||||
_column(284) int agepreg %4f "AGE AT PREGNANCY OUTCOME"
|
||||
_column(288) int datecon %4f "CM DATE OF CONCEPTION"
|
||||
_column(292) int agecon %4f "AGE AT TIME OF CONCEPTION"
|
||||
_column(296) byte fmarout5 %1f "FORMAL MARITAL STATUS AT PREGNANCY OUTCOME"
|
||||
_column(297) byte pmarpreg %1f "WHETHER PREGNANCY ENDED BEFORE R'S 1ST MARRIAGE (PREMARITALLY)"
|
||||
_column(298) byte rmarout6 %1f "INFORMAL MARITAL STATUS AT PREGNANCY OUTCOME - 6 CATEGORIES"
|
||||
_column(299) byte fmarcon5 %1f "FORMAL MARITAL STATUS AT CONCEPTION - 5 CATEGORIES"
|
||||
_column(300) byte learnprg %2f "NUMBER OF WEEKS PREGNANT WHEN R LEARNED SHE WAS PREGNANT"
|
||||
_column(302) byte pncarewk %2f "NUMBER OF WEEKS PREGNANT AT FIRST PRENATAL CARE"
|
||||
_column(304) byte paydeliv %1f "PAYMENT FOR DELIVERY"
|
||||
_column(305) byte lbw1 %1f "LOW BIRTHWEIGHT - BABY 1"
|
||||
_column(306) int bfeedwks %3f "DURATION OF BREASTFEEDING IN WEEKS"
|
||||
_column(309) byte maternlv %1f "USE OF MATERNITY LEAVE"
|
||||
_column(310) byte oldwantr %1f "WANTEDNESS OF PREGNANCY - RESPONDENT - CYCLE 4 VERSION"
|
||||
_column(311) byte oldwantp %1f "WANTEDNESS OF PREG - R'S PARTNER (FATHER OF PREGNANCY) - CYCLE 4 VERSION"
|
||||
_column(312) byte wantresp %1f "WANTEDNESS OF PREGNANCY - RESPONDENT - CYCLE 5 VERSION"
|
||||
_column(313) byte wantpart %1f "WANTEDNESS OF PREG - R'S PARTNER (FATHER OF PREGNANCY) - CYCLE 5 VERSION"
|
||||
_column(314) int cmbirth %4f "CENTURY MONTH OF R'S BIRTH"
|
||||
_column(318) byte ager %2f "AGE AT INTERVIEW"
|
||||
_column(320) byte agescrn %2f "R'S AGE AT SCREENER"
|
||||
_column(322) byte fmarital %1f "FORMAL MARITAL STATUS"
|
||||
_column(323) byte rmarital %1f "INFORMAL MARITAL STATUS"
|
||||
_column(324) byte educat %2f "EDUCATION (COMPLETED YEARS OF SCHOOLING)"
|
||||
_column(326) byte hieduc %2f "HIGHEST COMPLETED YEAR OF SCHOOL OR DEGREE"
|
||||
_column(328) byte race %1f "RACE"
|
||||
_column(329) byte hispanic %1f "HISPANIC ORIGIN"
|
||||
_column(330) byte hisprace %1f "RACE AND HISPANIC ORIGIN"
|
||||
_column(331) byte rcurpreg %1f "PREGNANT AT TIME OF INTERVIEW"
|
||||
_column(332) byte pregnum %2f "CAPI-BASED TOTAL NUMBER OF PREGNANCIES"
|
||||
_column(334) byte parity %2f "TOTAL NUMBER OF LIVE BIRTHS"
|
||||
_column(336) byte insuranc %1f "HEALTH INSURANCE COVERAGE STATUS"
|
||||
_column(337) byte pubassis %1f "WHETHER R RECEIVED PUBLIC ASSISTANCE IN 2001"
|
||||
_column(338) int poverty %3f "POVERTY LEVEL INCOME"
|
||||
_column(341) byte laborfor %1f "LABOR FORCE STATUS"
|
||||
_column(342) byte religion %1f "CURRENT RELIGIOUS AFFILIATION"
|
||||
_column(343) byte metro %1f "PLACE OF RESIDENCE (METROPOLITAN / NONMETROPOLITAN)"
|
||||
_column(344) byte brnout %1f "IB-8 R BORN OUTSIDE OF US"
|
||||
_column(345) int yrstrus %4f "YEAR R CAME TO THE UNITED STATES"
|
||||
_column(349) byte prglngth_i %1f "PRGLNGTH IMPUTATION FLAG"
|
||||
_column(350) byte outcome_i %1f "OUTCOME IMPUTATION FLAG"
|
||||
_column(351) byte birthord_i %1f "BIRTHORD IMPUTATION FLAG"
|
||||
_column(352) byte datend_i %1f "DATEND IMPUTATION FLAG"
|
||||
_column(353) byte agepreg_i %1f "AGEPREG IMPUTATION FLAG"
|
||||
_column(354) byte datecon_i %1f "DATECON IMPUTATION FLAG"
|
||||
_column(355) byte agecon_i %1f "AGECON IMPUTATION FLAG"
|
||||
_column(356) byte fmarout5_i %1f "FMAROUT5 IMPUTATION FLAG"
|
||||
_column(357) byte pmarpreg_i %1f "PMARPREG IMPUTATION FLAG"
|
||||
_column(358) byte rmarout6_i %1f "RMAROUT6 IMPUTATION FLAG"
|
||||
_column(359) byte fmarcon5_i %1f "FMARCON5 IMPUTATION FLAG"
|
||||
_column(360) byte learnprg_i %1f "LEARNPRG IMPUTATION FLAG"
|
||||
_column(361) byte pncarewk_i %1f "PNCAREWK IMPUTATION FLAG"
|
||||
_column(362) byte paydeliv_i %1f "PAYDELIV IMPUTATION FLAG"
|
||||
_column(363) byte lbw1_i %1f "LBW1 IMPUTATION FLAG"
|
||||
_column(364) byte bfeedwks_i %1f "BFEEDWKS IMPUTATION FLAG"
|
||||
_column(365) byte maternlv_i %1f "MATERNLV IMPUTATION FLAG"
|
||||
_column(366) byte oldwantr_i %1f "OLDWANTR IMPUTATION FLAG"
|
||||
_column(367) byte oldwantp_i %1f "OLDWANTP IMPUTATION FLAG"
|
||||
_column(368) byte wantresp_i %1f "WANTRESP IMPUTATION FLAG"
|
||||
_column(369) byte wantpart_i %1f "WANTPART IMPUTATION FLAG"
|
||||
_column(370) byte ager_i %1f "AGER IMPUTATION FLAG"
|
||||
_column(371) byte fmarital_i %1f "FMARITAL IMPUTATION FLAG"
|
||||
_column(372) byte rmarital_i %1f "RMARITAL IMPUTATION FLAG"
|
||||
_column(373) byte educat_i %1f "EDUCAT IMPUTATION FLAG"
|
||||
_column(374) byte hieduc_i %1f "HIEDUC IMPUTATION FLAG"
|
||||
_column(375) byte race_i %1f "RACE IMPUTATION FLAG"
|
||||
_column(376) byte hispanic_i %1f "HISPANIC IMPUTATION FLAG"
|
||||
_column(377) byte hisprace_i %1f "HISPRACE IMPUTATION FLAG"
|
||||
_column(378) byte rcurpreg_i %1f "RCURPREG IMPUTATION FLAG"
|
||||
_column(379) byte pregnum_i %1f "PREGNUM IMPUTATION FLAG"
|
||||
_column(380) byte parity_i %1f "PARITY IMPUTATION FLAG"
|
||||
_column(381) byte insuranc_i %1f "INSURANC IMPUTATION FLAG"
|
||||
_column(382) byte pubassis_i %1f "PUBASSIS IMPUTATION FLAG"
|
||||
_column(383) byte poverty_i %1f "POVERTY IMPUTATION FLAG"
|
||||
_column(384) byte laborfor_i %1f "LABORFOR IMPUTATION FLAG"
|
||||
_column(385) byte religion_i %1f "RELIGION IMPUTATION FLAG"
|
||||
_column(386) byte metro_i %1f "METRO IMPUTATION FLAG"
|
||||
_column(387) float basewgt %18f "BASE WEIGHT"
|
||||
_column(405) double adj_mod_basewgt %18f "ADJUSTED MODIFIED BASE WEIGHT"
|
||||
_column(423) double finalwgt %18f "FINAL POST-STRATIFIED AND ADJUSTED WEIGHT"
|
||||
_column(441) byte secu_p %1f "SCRAMBLED VERSION OF THE SAMPLING ERROR COMPUTATIONAL UNIT"
|
||||
_column(442) byte sest %2f "SCRAMBLED VERSION OF THE STRATUM"
|
||||
_column(444) int cmintvw %4f "CENTURY MONTH OF INTERVIEW DATE"
|
||||
}
|
160
scipy/first.py
Normal file
160
scipy/first.py
Normal file
|
@ -0,0 +1,160 @@
|
|||
"""This file contains code used in "Think Stats",
|
||||
by Allen B. Downey, available from greenteapress.com
|
||||
|
||||
Copyright 2014 Allen B. Downey
|
||||
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
import nsfg
|
||||
import thinkstats2
|
||||
import thinkplot
|
||||
|
||||
|
||||
def MakeFrames():
|
||||
"""Reads pregnancy data and partitions first babies and others.
|
||||
|
||||
returns: DataFrames (all live births, first babies, others)
|
||||
"""
|
||||
preg = nsfg.ReadFemPreg()
|
||||
|
||||
live = preg[preg.outcome == 1]
|
||||
firsts = live[live.birthord == 1]
|
||||
others = live[live.birthord != 1]
|
||||
|
||||
assert len(live) == 9148
|
||||
assert len(firsts) == 4413
|
||||
assert len(others) == 4735
|
||||
|
||||
return live, firsts, others
|
||||
|
||||
|
||||
def Summarize(live, firsts, others):
|
||||
"""Print various summary statistics."""
|
||||
|
||||
mean = live.prglngth.mean()
|
||||
var = live.prglngth.var()
|
||||
std = live.prglngth.std()
|
||||
|
||||
print('Live mean', mean)
|
||||
print('Live variance', var)
|
||||
print('Live std', std)
|
||||
|
||||
mean1 = firsts.prglngth.mean()
|
||||
mean2 = others.prglngth.mean()
|
||||
|
||||
var1 = firsts.prglngth.var()
|
||||
var2 = others.prglngth.var()
|
||||
|
||||
print('Mean')
|
||||
print('First babies', mean1)
|
||||
print('Others', mean2)
|
||||
|
||||
print('Variance')
|
||||
print('First babies', var1)
|
||||
print('Others', var2)
|
||||
|
||||
print('Difference in weeks', mean1 - mean2)
|
||||
print('Difference in hours', (mean1 - mean2) * 7 * 24)
|
||||
|
||||
print('Difference relative to 39 weeks', (mean1 - mean2) / 39 * 100)
|
||||
|
||||
d = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth)
|
||||
print('Cohen d', d)
|
||||
|
||||
|
||||
def PrintExtremes(live):
|
||||
"""Plots the histogram of pregnancy lengths and prints the extremes.
|
||||
|
||||
live: DataFrame of live births
|
||||
"""
|
||||
hist = thinkstats2.Hist(live.prglngth)
|
||||
thinkplot.Hist(hist, label='live births')
|
||||
|
||||
thinkplot.Save(root='first_nsfg_hist_live',
|
||||
title='Histogram',
|
||||
xlabel='weeks',
|
||||
ylabel='frequency')
|
||||
|
||||
print('Shortest lengths:')
|
||||
for weeks, freq in hist.Smallest(10):
|
||||
print(weeks, freq)
|
||||
|
||||
print('Longest lengths:')
|
||||
for weeks, freq in hist.Largest(10):
|
||||
print(weeks, freq)
|
||||
|
||||
|
||||
def MakeHists(live):
|
||||
"""Plot Hists for live births
|
||||
|
||||
live: DataFrame
|
||||
others: DataFrame
|
||||
"""
|
||||
hist = thinkstats2.Hist(live.birthwgt_lb, label='birthwgt_lb')
|
||||
thinkplot.Hist(hist)
|
||||
thinkplot.Save(root='first_wgt_lb_hist',
|
||||
xlabel='pounds',
|
||||
ylabel='frequency',
|
||||
axis=[-1, 14, 0, 3200])
|
||||
|
||||
hist = thinkstats2.Hist(live.birthwgt_oz, label='birthwgt_oz')
|
||||
thinkplot.Hist(hist)
|
||||
thinkplot.Save(root='first_wgt_oz_hist',
|
||||
xlabel='ounces',
|
||||
ylabel='frequency',
|
||||
axis=[-1, 16, 0, 1200])
|
||||
|
||||
hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
|
||||
thinkplot.Hist(hist)
|
||||
thinkplot.Save(root='first_agepreg_hist',
|
||||
xlabel='years',
|
||||
ylabel='frequency')
|
||||
|
||||
hist = thinkstats2.Hist(live.prglngth, label='prglngth')
|
||||
thinkplot.Hist(hist)
|
||||
thinkplot.Save(root='first_prglngth_hist',
|
||||
xlabel='weeks',
|
||||
ylabel='frequency',
|
||||
axis=[-1, 53, 0, 5000])
|
||||
|
||||
|
||||
def MakeComparison(firsts, others):
|
||||
"""Plots histograms of pregnancy length for first babies and others.
|
||||
|
||||
firsts: DataFrame
|
||||
others: DataFrame
|
||||
"""
|
||||
first_hist = thinkstats2.Hist(firsts.prglngth, label='first')
|
||||
other_hist = thinkstats2.Hist(others.prglngth, label='other')
|
||||
|
||||
width = 0.45
|
||||
thinkplot.PrePlot(2)
|
||||
thinkplot.Hist(first_hist, align='right', width=width)
|
||||
thinkplot.Hist(other_hist, align='left', width=width)
|
||||
|
||||
thinkplot.Save(root='first_nsfg_hist',
|
||||
title='Histogram',
|
||||
xlabel='weeks',
|
||||
ylabel='frequency',
|
||||
axis=[27, 46, 0, 2700])
|
||||
|
||||
|
||||
def main(script):
|
||||
live, firsts, others = MakeFrames()
|
||||
|
||||
MakeHists(live)
|
||||
PrintExtremes(live)
|
||||
MakeComparison(firsts, others)
|
||||
Summarize(live, firsts, others)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
main(*sys.argv)
|
||||
|
||||
|
652
scipy/hypothesis.ipynb
Normal file
652
scipy/hypothesis.ipynb
Normal file
File diff suppressed because one or more lines are too long
106
scipy/nsfg.py
Normal file
106
scipy/nsfg.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
"""This file contains code for use with "Think Stats",
|
||||
by Allen B. Downey, available from greenteapress.com
|
||||
|
||||
Copyright 2010 Allen B. Downey
|
||||
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
import thinkstats2
|
||||
|
||||
|
||||
def ReadFemPreg(dct_file='2002FemPreg.dct',
|
||||
dat_file='2002FemPreg.dat.gz'):
|
||||
"""Reads the NSFG pregnancy data.
|
||||
|
||||
dct_file: string file name
|
||||
dat_file: string file name
|
||||
|
||||
returns: DataFrame
|
||||
"""
|
||||
dct = thinkstats2.ReadStataDct(dct_file)
|
||||
df = dct.ReadFixedWidth(dat_file, compression='gzip')
|
||||
CleanFemPreg(df)
|
||||
return df
|
||||
|
||||
|
||||
def CleanFemPreg(df):
|
||||
"""Recodes variables from the pregnancy frame.
|
||||
|
||||
df: DataFrame
|
||||
"""
|
||||
# mother's age is encoded in centiyears; convert to years
|
||||
df.agepreg /= 100.0
|
||||
|
||||
# birthwgt_lb contains at least one bogus value (51 lbs)
|
||||
# replace with NaN
|
||||
df.birthwgt_lb[df.birthwgt_lb > 20] = np.nan
|
||||
|
||||
# replace 'not ascertained', 'refused', 'don't know' with NaN
|
||||
na_vals = [97, 98, 99]
|
||||
df.birthwgt_lb.replace(na_vals, np.nan, inplace=True)
|
||||
df.birthwgt_oz.replace(na_vals, np.nan, inplace=True)
|
||||
df.hpagelb.replace(na_vals, np.nan, inplace=True)
|
||||
|
||||
df.babysex.replace([7, 9], np.nan, inplace=True)
|
||||
df.nbrnaliv.replace([9], np.nan, inplace=True)
|
||||
|
||||
# birthweight is stored in two columns, lbs and oz.
|
||||
# convert to a single column in lb
|
||||
# NOTE: creating a new column requires dictionary syntax,
|
||||
# not attribute assignment (like df.totalwgt_lb)
|
||||
df['totalwgt_lb'] = df.birthwgt_lb + df.birthwgt_oz / 16.0
|
||||
|
||||
# due to a bug in ReadStataDct, the last variable gets clipped;
|
||||
# so for now set it to NaN
|
||||
df.cmintvw = np.nan
|
||||
|
||||
|
||||
def MakePregMap(df):
|
||||
"""Make a map from caseid to list of preg indices.
|
||||
|
||||
df: DataFrame
|
||||
|
||||
returns: dict that maps from caseid to list of indices into preg df
|
||||
"""
|
||||
d = defaultdict(list)
|
||||
for index, caseid in df.caseid.iteritems():
|
||||
d[caseid].append(index)
|
||||
return d
|
||||
|
||||
|
||||
def main(script):
|
||||
"""Tests the functions in this module.
|
||||
|
||||
script: string script name
|
||||
"""
|
||||
df = ReadFemPreg()
|
||||
print(df.shape)
|
||||
|
||||
assert len(df) == 13593
|
||||
|
||||
assert df.caseid[13592] == 12571
|
||||
assert df.pregordr.value_counts()[1] == 5033
|
||||
assert df.nbrnaliv.value_counts()[1] == 8981
|
||||
assert df.babysex.value_counts()[1] == 4641
|
||||
assert df.birthwgt_lb.value_counts()[7] == 3049
|
||||
assert df.birthwgt_oz.value_counts()[0] == 1037
|
||||
assert df.prglngth.value_counts()[39] == 4744
|
||||
assert df.outcome.value_counts()[1] == 9148
|
||||
assert df.birthord.value_counts()[1] == 4413
|
||||
assert df.agepreg.value_counts()[22.75] == 100
|
||||
assert df.totalwgt_lb.value_counts()[7.5] == 302
|
||||
|
||||
weights = df.finalwgt.value_counts()
|
||||
key = max(weights.keys())
|
||||
assert df.finalwgt.value_counts()[key] == 6
|
||||
|
||||
print('%s: All tests passed.' % script)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(*sys.argv)
|
716
scipy/thinkplot.py
Normal file
716
scipy/thinkplot.py
Normal file
|
@ -0,0 +1,716 @@
|
|||
"""This file contains code for use with "Think Stats",
|
||||
by Allen B. Downey, available from greenteapress.com
|
||||
|
||||
Copyright 2014 Allen B. Downey
|
||||
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as pyplot
|
||||
import numpy as np
|
||||
import pandas
|
||||
|
||||
import warnings
|
||||
|
||||
# customize some matplotlib attributes
|
||||
#matplotlib.rc('figure', figsize=(4, 3))
|
||||
|
||||
#matplotlib.rc('font', size=14.0)
|
||||
#matplotlib.rc('axes', labelsize=22.0, titlesize=22.0)
|
||||
#matplotlib.rc('legend', fontsize=20.0)
|
||||
|
||||
#matplotlib.rc('xtick.major', size=6.0)
|
||||
#matplotlib.rc('xtick.minor', size=3.0)
|
||||
|
||||
#matplotlib.rc('ytick.major', size=6.0)
|
||||
#matplotlib.rc('ytick.minor', size=3.0)
|
||||
|
||||
|
||||
class _Brewer(object):
|
||||
"""Encapsulates a nice sequence of colors.
|
||||
|
||||
Shades of blue that look good in color and can be distinguished
|
||||
in grayscale (up to a point).
|
||||
|
||||
Borrowed from http://colorbrewer2.org/
|
||||
"""
|
||||
color_iter = None
|
||||
|
||||
colors = ['#081D58',
|
||||
'#253494',
|
||||
'#225EA8',
|
||||
'#1D91C0',
|
||||
'#41B6C4',
|
||||
'#7FCDBB',
|
||||
'#C7E9B4',
|
||||
'#EDF8B1',
|
||||
'#FFFFD9']
|
||||
|
||||
# lists that indicate which colors to use depending on how many are used
|
||||
which_colors = [[],
|
||||
[1],
|
||||
[1, 3],
|
||||
[0, 2, 4],
|
||||
[0, 2, 4, 6],
|
||||
[0, 2, 3, 5, 6],
|
||||
[0, 2, 3, 4, 5, 6],
|
||||
[0, 1, 2, 3, 4, 5, 6],
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def Colors(cls):
|
||||
"""Returns the list of colors.
|
||||
"""
|
||||
return cls.colors
|
||||
|
||||
@classmethod
|
||||
def ColorGenerator(cls, n):
|
||||
"""Returns an iterator of color strings.
|
||||
|
||||
n: how many colors will be used
|
||||
"""
|
||||
for i in cls.which_colors[n]:
|
||||
yield cls.colors[i]
|
||||
raise StopIteration('Ran out of colors in _Brewer.ColorGenerator')
|
||||
|
||||
@classmethod
|
||||
def InitializeIter(cls, num):
|
||||
"""Initializes the color iterator with the given number of colors."""
|
||||
cls.color_iter = cls.ColorGenerator(num)
|
||||
|
||||
@classmethod
|
||||
def ClearIter(cls):
|
||||
"""Sets the color iterator to None."""
|
||||
cls.color_iter = None
|
||||
|
||||
@classmethod
|
||||
def GetIter(cls):
|
||||
"""Gets the color iterator."""
|
||||
if cls.color_iter is None:
|
||||
cls.InitializeIter(7)
|
||||
|
||||
return cls.color_iter
|
||||
|
||||
|
||||
def PrePlot(num=None, rows=None, cols=None):
|
||||
"""Takes hints about what's coming.
|
||||
|
||||
num: number of lines that will be plotted
|
||||
rows: number of rows of subplots
|
||||
cols: number of columns of subplots
|
||||
"""
|
||||
if num:
|
||||
_Brewer.InitializeIter(num)
|
||||
|
||||
if rows is None and cols is None:
|
||||
return
|
||||
|
||||
if rows is not None and cols is None:
|
||||
cols = 1
|
||||
|
||||
if cols is not None and rows is None:
|
||||
rows = 1
|
||||
|
||||
# resize the image, depending on the number of rows and cols
|
||||
size_map = {(1, 1): (8, 6),
|
||||
(1, 2): (14, 6),
|
||||
(1, 3): (14, 6),
|
||||
(2, 2): (10, 10),
|
||||
(2, 3): (16, 10),
|
||||
(3, 1): (8, 10),
|
||||
}
|
||||
|
||||
if (rows, cols) in size_map:
|
||||
fig = pyplot.gcf()
|
||||
fig.set_size_inches(*size_map[rows, cols])
|
||||
|
||||
# create the first subplot
|
||||
if rows > 1 or cols > 1:
|
||||
pyplot.subplot(rows, cols, 1)
|
||||
global SUBPLOT_ROWS, SUBPLOT_COLS
|
||||
SUBPLOT_ROWS = rows
|
||||
SUBPLOT_COLS = cols
|
||||
|
||||
|
||||
def SubPlot(plot_number, rows=None, cols=None):
|
||||
"""Configures the number of subplots and changes the current plot.
|
||||
|
||||
rows: int
|
||||
cols: int
|
||||
plot_number: int
|
||||
"""
|
||||
rows = rows or SUBPLOT_ROWS
|
||||
cols = cols or SUBPLOT_COLS
|
||||
pyplot.subplot(rows, cols, plot_number)
|
||||
|
||||
|
||||
def _Underride(d, **options):
|
||||
"""Add key-value pairs to d only if key is not in d.
|
||||
|
||||
If d is None, create a new dictionary.
|
||||
|
||||
d: dictionary
|
||||
options: keyword args to add to d
|
||||
"""
|
||||
if d is None:
|
||||
d = {}
|
||||
|
||||
for key, val in options.items():
|
||||
d.setdefault(key, val)
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def Clf():
|
||||
"""Clears the figure and any hints that have been set."""
|
||||
global LOC
|
||||
LOC = None
|
||||
_Brewer.ClearIter()
|
||||
pyplot.clf()
|
||||
fig = pyplot.gcf()
|
||||
fig.set_size_inches(8, 6)
|
||||
|
||||
|
||||
def Figure(**options):
|
||||
"""Sets options for the current figure."""
|
||||
_Underride(options, figsize=(6, 8))
|
||||
pyplot.figure(**options)
|
||||
|
||||
|
||||
def _UnderrideColor(options):
|
||||
if 'color' in options:
|
||||
return options
|
||||
|
||||
color_iter = _Brewer.GetIter()
|
||||
|
||||
if color_iter:
|
||||
try:
|
||||
options['color'] = next(color_iter)
|
||||
except StopIteration:
|
||||
# TODO: reconsider whether this should warn
|
||||
# warnings.warn('Warning: Brewer ran out of colors.')
|
||||
_Brewer.ClearIter()
|
||||
return options
|
||||
|
||||
|
||||
def Plot(obj, ys=None, style='', **options):
|
||||
"""Plots a line.
|
||||
|
||||
Args:
|
||||
obj: sequence of x values, or Series, or anything with Render()
|
||||
ys: sequence of y values
|
||||
style: style string passed along to pyplot.plot
|
||||
options: keyword args passed to pyplot.plot
|
||||
"""
|
||||
options = _UnderrideColor(options)
|
||||
label = getattr(obj, 'label', '_nolegend_')
|
||||
options = _Underride(options, linewidth=3, alpha=0.8, label=label)
|
||||
|
||||
xs = obj
|
||||
if ys is None:
|
||||
if hasattr(obj, 'Render'):
|
||||
xs, ys = obj.Render()
|
||||
if isinstance(obj, pandas.Series):
|
||||
ys = obj.values
|
||||
xs = obj.index
|
||||
|
||||
if ys is None:
|
||||
pyplot.plot(xs, style, **options)
|
||||
else:
|
||||
pyplot.plot(xs, ys, style, **options)
|
||||
|
||||
|
||||
def FillBetween(xs, y1, y2=None, where=None, **options):
|
||||
"""Plots a line.
|
||||
|
||||
Args:
|
||||
xs: sequence of x values
|
||||
y1: sequence of y values
|
||||
y2: sequence of y values
|
||||
where: sequence of boolean
|
||||
options: keyword args passed to pyplot.fill_between
|
||||
"""
|
||||
options = _UnderrideColor(options)
|
||||
options = _Underride(options, linewidth=0, alpha=0.5)
|
||||
pyplot.fill_between(xs, y1, y2, where, **options)
|
||||
|
||||
|
||||
def Bar(xs, ys, **options):
|
||||
"""Plots a line.
|
||||
|
||||
Args:
|
||||
xs: sequence of x values
|
||||
ys: sequence of y values
|
||||
options: keyword args passed to pyplot.bar
|
||||
"""
|
||||
options = _UnderrideColor(options)
|
||||
options = _Underride(options, linewidth=0, alpha=0.6)
|
||||
pyplot.bar(xs, ys, **options)
|
||||
|
||||
|
||||
def Scatter(xs, ys=None, **options):
|
||||
"""Makes a scatter plot.
|
||||
|
||||
xs: x values
|
||||
ys: y values
|
||||
options: options passed to pyplot.scatter
|
||||
"""
|
||||
options = _Underride(options, color='blue', alpha=0.2,
|
||||
s=30, edgecolors='none')
|
||||
|
||||
if ys is None and isinstance(xs, pandas.Series):
|
||||
ys = xs.values
|
||||
xs = xs.index
|
||||
|
||||
pyplot.scatter(xs, ys, **options)
|
||||
|
||||
|
||||
def HexBin(xs, ys, **options):
|
||||
"""Makes a scatter plot.
|
||||
|
||||
xs: x values
|
||||
ys: y values
|
||||
options: options passed to pyplot.scatter
|
||||
"""
|
||||
options = _Underride(options, cmap=matplotlib.cm.Blues)
|
||||
pyplot.hexbin(xs, ys, **options)
|
||||
|
||||
|
||||
def Pdf(pdf, **options):
|
||||
"""Plots a Pdf, Pmf, or Hist as a line.
|
||||
|
||||
Args:
|
||||
pdf: Pdf, Pmf, or Hist object
|
||||
options: keyword args passed to pyplot.plot
|
||||
"""
|
||||
low, high = options.pop('low', None), options.pop('high', None)
|
||||
n = options.pop('n', 101)
|
||||
xs, ps = pdf.Render(low=low, high=high, n=n)
|
||||
options = _Underride(options, label=pdf.label)
|
||||
Plot(xs, ps, **options)
|
||||
|
||||
|
||||
def Pdfs(pdfs, **options):
|
||||
"""Plots a sequence of PDFs.
|
||||
|
||||
Options are passed along for all PDFs. If you want different
|
||||
options for each pdf, make multiple calls to Pdf.
|
||||
|
||||
Args:
|
||||
pdfs: sequence of PDF objects
|
||||
options: keyword args passed to pyplot.plot
|
||||
"""
|
||||
for pdf in pdfs:
|
||||
Pdf(pdf, **options)
|
||||
|
||||
|
||||
def Hist(hist, **options):
|
||||
"""Plots a Pmf or Hist with a bar plot.
|
||||
|
||||
The default width of the bars is based on the minimum difference
|
||||
between values in the Hist. If that's too small, you can override
|
||||
it by providing a width keyword argument, in the same units
|
||||
as the values.
|
||||
|
||||
Args:
|
||||
hist: Hist or Pmf object
|
||||
options: keyword args passed to pyplot.bar
|
||||
"""
|
||||
# find the minimum distance between adjacent values
|
||||
xs, ys = hist.Render()
|
||||
|
||||
if 'width' not in options:
|
||||
try:
|
||||
options['width'] = 0.9 * np.diff(xs).min()
|
||||
except TypeError:
|
||||
warnings.warn("Hist: Can't compute bar width automatically."
|
||||
"Check for non-numeric types in Hist."
|
||||
"Or try providing width option."
|
||||
)
|
||||
|
||||
options = _Underride(options, label=hist.label)
|
||||
options = _Underride(options, align='center')
|
||||
if options['align'] == 'left':
|
||||
options['align'] = 'edge'
|
||||
elif options['align'] == 'right':
|
||||
options['align'] = 'edge'
|
||||
options['width'] *= -1
|
||||
|
||||
Bar(xs, ys, **options)
|
||||
|
||||
|
||||
def Hists(hists, **options):
|
||||
"""Plots two histograms as interleaved bar plots.
|
||||
|
||||
Options are passed along for all PMFs. If you want different
|
||||
options for each pmf, make multiple calls to Pmf.
|
||||
|
||||
Args:
|
||||
hists: list of two Hist or Pmf objects
|
||||
options: keyword args passed to pyplot.plot
|
||||
"""
|
||||
for hist in hists:
|
||||
Hist(hist, **options)
|
||||
|
||||
|
||||
def Pmf(pmf, **options):
|
||||
"""Plots a Pmf or Hist as a line.
|
||||
|
||||
Args:
|
||||
pmf: Hist or Pmf object
|
||||
options: keyword args passed to pyplot.plot
|
||||
"""
|
||||
xs, ys = pmf.Render()
|
||||
low, high = min(xs), max(xs)
|
||||
|
||||
width = options.pop('width', None)
|
||||
if width is None:
|
||||
try:
|
||||
width = np.diff(xs).min()
|
||||
except TypeError:
|
||||
warnings.warn("Pmf: Can't compute bar width automatically."
|
||||
"Check for non-numeric types in Pmf."
|
||||
"Or try providing width option.")
|
||||
points = []
|
||||
|
||||
lastx = np.nan
|
||||
lasty = 0
|
||||
for x, y in zip(xs, ys):
|
||||
if (x - lastx) > 1e-5:
|
||||
points.append((lastx, 0))
|
||||
points.append((x, 0))
|
||||
|
||||
points.append((x, lasty))
|
||||
points.append((x, y))
|
||||
points.append((x+width, y))
|
||||
|
||||
lastx = x + width
|
||||
lasty = y
|
||||
points.append((lastx, 0))
|
||||
pxs, pys = zip(*points)
|
||||
|
||||
align = options.pop('align', 'center')
|
||||
if align == 'center':
|
||||
pxs = np.array(pxs) - width/2.0
|
||||
if align == 'right':
|
||||
pxs = np.array(pxs) - width
|
||||
|
||||
options = _Underride(options, label=pmf.label)
|
||||
Plot(pxs, pys, **options)
|
||||
|
||||
|
||||
def Pmfs(pmfs, **options):
|
||||
"""Plots a sequence of PMFs.
|
||||
|
||||
Options are passed along for all PMFs. If you want different
|
||||
options for each pmf, make multiple calls to Pmf.
|
||||
|
||||
Args:
|
||||
pmfs: sequence of PMF objects
|
||||
options: keyword args passed to pyplot.plot
|
||||
"""
|
||||
for pmf in pmfs:
|
||||
Pmf(pmf, **options)
|
||||
|
||||
|
||||
def Diff(t):
|
||||
"""Compute the differences between adjacent elements in a sequence.
|
||||
|
||||
Args:
|
||||
t: sequence of number
|
||||
|
||||
Returns:
|
||||
sequence of differences (length one less than t)
|
||||
"""
|
||||
diffs = [t[i+1] - t[i] for i in range(len(t)-1)]
|
||||
return diffs
|
||||
|
||||
|
||||
def Cdf(cdf, complement=False, transform=None, **options):
|
||||
"""Plots a CDF as a line.
|
||||
|
||||
Args:
|
||||
cdf: Cdf object
|
||||
complement: boolean, whether to plot the complementary CDF
|
||||
transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel'
|
||||
options: keyword args passed to pyplot.plot
|
||||
|
||||
Returns:
|
||||
dictionary with the scale options that should be passed to
|
||||
Config, Show or Save.
|
||||
"""
|
||||
xs, ps = cdf.Render()
|
||||
xs = np.asarray(xs)
|
||||
ps = np.asarray(ps)
|
||||
|
||||
scale = dict(xscale='linear', yscale='linear')
|
||||
|
||||
for s in ['xscale', 'yscale']:
|
||||
if s in options:
|
||||
scale[s] = options.pop(s)
|
||||
|
||||
if transform == 'exponential':
|
||||
complement = True
|
||||
scale['yscale'] = 'log'
|
||||
|
||||
if transform == 'pareto':
|
||||
complement = True
|
||||
scale['yscale'] = 'log'
|
||||
scale['xscale'] = 'log'
|
||||
|
||||
if complement:
|
||||
ps = [1.0-p for p in ps]
|
||||
|
||||
if transform == 'weibull':
|
||||
xs = np.delete(xs, -1)
|
||||
ps = np.delete(ps, -1)
|
||||
ps = [-math.log(1.0-p) for p in ps]
|
||||
scale['xscale'] = 'log'
|
||||
scale['yscale'] = 'log'
|
||||
|
||||
if transform == 'gumbel':
|
||||
xs = xp.delete(xs, 0)
|
||||
ps = np.delete(ps, 0)
|
||||
ps = [-math.log(p) for p in ps]
|
||||
scale['yscale'] = 'log'
|
||||
|
||||
options = _Underride(options, label=cdf.label)
|
||||
Plot(xs, ps, **options)
|
||||
return scale
|
||||
|
||||
|
||||
def Cdfs(cdfs, complement=False, transform=None, **options):
|
||||
"""Plots a sequence of CDFs.
|
||||
|
||||
cdfs: sequence of CDF objects
|
||||
complement: boolean, whether to plot the complementary CDF
|
||||
transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel'
|
||||
options: keyword args passed to pyplot.plot
|
||||
"""
|
||||
for cdf in cdfs:
|
||||
Cdf(cdf, complement, transform, **options)
|
||||
|
||||
|
||||
def Contour(obj, pcolor=False, contour=True, imshow=False, **options):
|
||||
"""Makes a contour plot.
|
||||
|
||||
d: map from (x, y) to z, or object that provides GetDict
|
||||
pcolor: boolean, whether to make a pseudocolor plot
|
||||
contour: boolean, whether to make a contour plot
|
||||
imshow: boolean, whether to use pyplot.imshow
|
||||
options: keyword args passed to pyplot.pcolor and/or pyplot.contour
|
||||
"""
|
||||
try:
|
||||
d = obj.GetDict()
|
||||
except AttributeError:
|
||||
d = obj
|
||||
|
||||
_Underride(options, linewidth=3, cmap=matplotlib.cm.Blues)
|
||||
|
||||
xs, ys = zip(*d.keys())
|
||||
xs = sorted(set(xs))
|
||||
ys = sorted(set(ys))
|
||||
|
||||
X, Y = np.meshgrid(xs, ys)
|
||||
func = lambda x, y: d.get((x, y), 0)
|
||||
func = np.vectorize(func)
|
||||
Z = func(X, Y)
|
||||
|
||||
x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
|
||||
axes = pyplot.gca()
|
||||
axes.xaxis.set_major_formatter(x_formatter)
|
||||
|
||||
if pcolor:
|
||||
pyplot.pcolormesh(X, Y, Z, **options)
|
||||
if contour:
|
||||
cs = pyplot.contour(X, Y, Z, **options)
|
||||
pyplot.clabel(cs, inline=1, fontsize=10)
|
||||
if imshow:
|
||||
extent = xs[0], xs[-1], ys[0], ys[-1]
|
||||
pyplot.imshow(Z, extent=extent, **options)
|
||||
|
||||
|
||||
def Pcolor(xs, ys, zs, pcolor=True, contour=False, **options):
|
||||
"""Makes a pseudocolor plot.
|
||||
|
||||
xs:
|
||||
ys:
|
||||
zs:
|
||||
pcolor: boolean, whether to make a pseudocolor plot
|
||||
contour: boolean, whether to make a contour plot
|
||||
options: keyword args passed to pyplot.pcolor and/or pyplot.contour
|
||||
"""
|
||||
_Underride(options, linewidth=3, cmap=matplotlib.cm.Blues)
|
||||
|
||||
X, Y = np.meshgrid(xs, ys)
|
||||
Z = zs
|
||||
|
||||
x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
|
||||
axes = pyplot.gca()
|
||||
axes.xaxis.set_major_formatter(x_formatter)
|
||||
|
||||
if pcolor:
|
||||
pyplot.pcolormesh(X, Y, Z, **options)
|
||||
|
||||
if contour:
|
||||
cs = pyplot.contour(X, Y, Z, **options)
|
||||
pyplot.clabel(cs, inline=1, fontsize=10)
|
||||
|
||||
|
||||
def Text(x, y, s, **options):
|
||||
"""Puts text in a figure.
|
||||
|
||||
x: number
|
||||
y: number
|
||||
s: string
|
||||
options: keyword args passed to pyplot.text
|
||||
"""
|
||||
options = _Underride(options,
|
||||
fontsize=16,
|
||||
verticalalignment='top',
|
||||
horizontalalignment='left')
|
||||
pyplot.text(x, y, s, **options)
|
||||
|
||||
|
||||
LEGEND = True
|
||||
LOC = None
|
||||
|
||||
def Config(**options):
|
||||
"""Configures the plot.
|
||||
|
||||
Pulls options out of the option dictionary and passes them to
|
||||
the corresponding pyplot functions.
|
||||
"""
|
||||
names = ['title', 'xlabel', 'ylabel', 'xscale', 'yscale',
|
||||
'xticks', 'yticks', 'axis', 'xlim', 'ylim']
|
||||
|
||||
for name in names:
|
||||
if name in options:
|
||||
getattr(pyplot, name)(options[name])
|
||||
|
||||
# looks like this is not necessary: matplotlib understands text loc specs
|
||||
loc_dict = {'upper right': 1,
|
||||
'upper left': 2,
|
||||
'lower left': 3,
|
||||
'lower right': 4,
|
||||
'right': 5,
|
||||
'center left': 6,
|
||||
'center right': 7,
|
||||
'lower center': 8,
|
||||
'upper center': 9,
|
||||
'center': 10,
|
||||
}
|
||||
|
||||
global LEGEND
|
||||
LEGEND = options.get('legend', LEGEND)
|
||||
|
||||
if LEGEND:
|
||||
global LOC
|
||||
LOC = options.get('loc', LOC)
|
||||
pyplot.legend(loc=LOC)
|
||||
|
||||
|
||||
def Show(**options):
|
||||
"""Shows the plot.
|
||||
|
||||
For options, see Config.
|
||||
|
||||
options: keyword args used to invoke various pyplot functions
|
||||
"""
|
||||
clf = options.pop('clf', True)
|
||||
Config(**options)
|
||||
pyplot.show()
|
||||
if clf:
|
||||
Clf()
|
||||
|
||||
|
||||
def Plotly(**options):
|
||||
"""Shows the plot.
|
||||
|
||||
For options, see Config.
|
||||
|
||||
options: keyword args used to invoke various pyplot functions
|
||||
"""
|
||||
clf = options.pop('clf', True)
|
||||
Config(**options)
|
||||
import plotly.plotly as plotly
|
||||
url = plotly.plot_mpl(pyplot.gcf())
|
||||
if clf:
|
||||
Clf()
|
||||
return url
|
||||
|
||||
|
||||
def Save(root=None, formats=None, **options):
|
||||
"""Saves the plot in the given formats and clears the figure.
|
||||
|
||||
For options, see Config.
|
||||
|
||||
Args:
|
||||
root: string filename root
|
||||
formats: list of string formats
|
||||
options: keyword args used to invoke various pyplot functions
|
||||
"""
|
||||
clf = options.pop('clf', True)
|
||||
Config(**options)
|
||||
|
||||
if formats is None:
|
||||
formats = ['pdf', 'eps']
|
||||
|
||||
try:
|
||||
formats.remove('plotly')
|
||||
Plotly(clf=False)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if root:
|
||||
for fmt in formats:
|
||||
SaveFormat(root, fmt)
|
||||
if clf:
|
||||
Clf()
|
||||
|
||||
|
||||
def SaveFormat(root, fmt='eps'):
|
||||
"""Writes the current figure to a file in the given format.
|
||||
|
||||
Args:
|
||||
root: string filename root
|
||||
fmt: string format
|
||||
"""
|
||||
filename = '%s.%s' % (root, fmt)
|
||||
print('Writing', filename)
|
||||
pyplot.savefig(filename, format=fmt, dpi=300)
|
||||
|
||||
|
||||
# provide aliases for calling functons with lower-case names
|
||||
preplot = PrePlot
|
||||
subplot = SubPlot
|
||||
clf = Clf
|
||||
figure = Figure
|
||||
plot = Plot
|
||||
text = Text
|
||||
scatter = Scatter
|
||||
pmf = Pmf
|
||||
pmfs = Pmfs
|
||||
hist = Hist
|
||||
hists = Hists
|
||||
diff = Diff
|
||||
cdf = Cdf
|
||||
cdfs = Cdfs
|
||||
contour = Contour
|
||||
pcolor = Pcolor
|
||||
config = Config
|
||||
show = Show
|
||||
save = Save
|
||||
|
||||
|
||||
def main():
|
||||
color_iter = _Brewer.ColorGenerator(7)
|
||||
for color in color_iter:
|
||||
print(color)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
2801
scipy/thinkstats2.py
Normal file
2801
scipy/thinkstats2.py
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user