** RESOURCES FOR STATA
** https://www.stata.com/links/resources-for-learning-stata/


** set working folder
cd " C:\...\EDYI_slab2 "

** start log file
log using lab2.log, replace

******************************
*
* descriptive statistics
*
******************************
insheet using press.txt, clear

** continuous variables

codebook

summarize age

sum pressure, detail

** categorical variables
tabulate gender

table gender


******************************
* analysis by groups
******************************

summarize height if gender=="M"
sum height if gender=="F"

by gender: sum height

sort gender
by gender: sum weight
by gender, sort: sum weight

table gender, contents(mean height)
table gender, c(mean height sd height min height max height)


******************************
*
* missing values in Stata
*
******************************
insheet using press2.txt, clear

tab byear
tab byear, missing

sum height weight
tab height, m
tab weight, m

** replace numeric value to missing
replace height=. if height==999
tab height, m

mvdecode weight, mv(-1)
tab weight, m

count if height>170
list id gender height if height>170

count if height>170 & height!=.
list id gender height if height>170 & height!=.

** replace missing value to numeric value
mvencode weight, mv(-1)
mvencode height, mv(999)

li



******************************
*
*   data with dates
*
******************************

use date, clear
describe
list

** create date variable fron a string
gen newdate1=date(date1, "DMY")
li id date1 newdate1
format newdate1 %d
li id date1 newdate1

** create date variable fron string with 2-digits year
gen newdate2=date(date2, "DMY")
li id date2 newdate2
drop newdate2
gen newdate2=date(date2, "DM19Y")
format newdate2 %d
li id date2 newdate2

** extract day, month, year, week day from a date variable
gen dd=day(newdate)
gen mm=month(newdate)
gen yy=year(newdate)
gen weekday=dow(newdate)
li id date1 newdate dd mm yy weekday

** create date from day-month-year
gen date=mdy(mm, dd, yy)
format date %d
li id date1 dd mm yy date



******************************
*	
*    GRAPHS
*
******************************
use data_all, clear

** pie chart
***************************************
* encode gender, gen(sex)

tab1 alcohol coffee, m
mvdecode alcohol coffee, mv(9)

graph pie, over(alcohol)

graph pie, over(alcohol) by(sex) plabel(_all sum)

graph pie, over(alcohol) by(sex) plabel(_all percent)

gr export pie_alc.emf, replace


** bar chart
******************************
table alcohol

graph bar (count), over(alcohol) blabel(bar)

graph bar, over(alcohol) blabel(bar, format(%4.1f))


**   histogram 
******************************
hist pressure

sort gender

hist pressure, bin(5) normal by(gender)

** save graph in metafile format (.emf)
gr export hist_pres.emf, replace


**   boxplot
******************************
graph box pressure

graph box pressure, by(alcohol)

graph box pressure, over(alcohol)

graph box pressure, over(alcohol) over(gender)

gr export box_pres.emf, replace


* scatterplot
******************************
sum weight height pressure
replace weight=. if weight<0
replace height=. if height>300

scatter weight height

graph matrix weight height pressure


******************************
*
*    combine graphs
*
******************************

graph box pressure, over(alcohol) saving(press_alc, replace)

hist pressure, bin(5) normal by(gender) saving(press_hist, replace)

graph combine "press_alc" "press_hist" 



