Skip to content

Commit

Permalink
Merge pull request #19 from yli110-stat660/feature
Browse files Browse the repository at this point in the history
YL indentations update
  • Loading branch information
yli110-stat697 authored Sep 29, 2018
2 parents ea65668 + acada0f commit a776a67
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 84 deletions.
101 changes: 50 additions & 51 deletions STAT660-01_f18-team-2_project1_data_analysis_by_YL.sas
Original file line number Diff line number Diff line change
Expand Up @@ -58,35 +58,35 @@ Follow-up Steps: use a CLASS statement in PROC MEANS to get the summary
statistics for each employee
;
proc means
mean median maxdec=2
data = absenteeism_analytic_file
;
mean median maxdec=2
data = absenteeism_analytic_file
;
var
Absenteeism_time_in_hours
;
Absenteeism_time_in_hours
;
run;


proc univariate
noprint
data=absenteeism_analytic_file
;
noprint
data=absenteeism_analytic_file
;
var
Absenteeism_time_in_hours
;
Absenteeism_time_in_hours
;
histogram;
run;

proc means
mean median maxdec=2
data=absenteeism_analytic_file
;
mean median maxdec=2
data=absenteeism_analytic_file
;
class
id
;
id
;
var
Absenteeism_time_in_hours
;
Absenteeism_time_in_hours
;
run;

title;
Expand Down Expand Up @@ -125,16 +125,16 @@ create a subset of the dataset, which only has the 36 employees with a binary
variable to indicate if he or she is ever absent.
;
proc logistic
data=Absenteeism_analytic_file
;
model absence = Work_load_Average_day
;
data=Absenteeism_analytic_file
;
model absence = Work_load_Average_day
;
run;

proc glm
;
model absenteeism_time_in_hours = Work_load_Average_day
;
;
model absenteeism_time_in_hours = Work_load_Average_day
;
run;

title;
Expand Down Expand Up @@ -174,41 +174,40 @@ compare the histograms between workers -- data visualizaion often helps to
quickly identify the change.
;
proc freq
data = Absenteeism_analytic_file
;
tables
id*reason_for_absence
/ nopercent norow nocol
;
format
reason_for_absence reasonofabsence.;
data = Absenteeism_analytic_file
;
tables
id*reason_for_absence
/ nopercent norow nocol
;
format
reason_for_absence reasonofabsence.;
run;

proc freq
data = Absenteeism_analytic_file
;
tables
id*reason_for_absence
/nopercent norow nocol
;
where
absence = 1
;
format
reason_for_absence reasonofabsence.;
data = Absenteeism_analytic_file
;
tables
id*reason_for_absence
/nopercent norow nocol
;
where
absence = 1
;
format
reason_for_absence reasonofabsence.;
run;

proc sgplot
data=absenteeism_analytic_file
;
vbar
ID
;
where
absence = 1;
data=absenteeism_analytic_file
;
vbar
ID
;
where
absence = 1;
run;
quit;

title;
footnote;

66 changes: 33 additions & 33 deletions STAT660-01_f18-team-2_project1_data_preparation.sas
Original file line number Diff line number Diff line change
Expand Up @@ -29,36 +29,36 @@ downloaded from the UCI machine learning repository.
*create output formats;
proc format;
value reasonofabsence
0 = 'NA'
1 = 'Infectious and parasitic disease'
2 = 'neoplasm'
3 = 'blood disease'
4 = 'endocrine disease'
5 = 'mental and behaviour disorder'
6 = 'nervous disease'
7 = 'eye'
8 = 'ear'
9 = 'circulatory'
10= 'respiratory'
11= 'digestive'
12= 'skin'
13= 'muscle'
14= 'genitourinary'
15= 'pregnancy'
16= 'perinatal'
17= 'congenital'
18= 'clinical'
19= 'injury'
20= 'morbidity and mortality'
21= 'factors'
22= 'followup'
23= 'medical consultation'
24= 'blodd donation'
25= 'lab'
26= 'unjustified'
27= 'physiotherapy'
28= 'dental'
;
0 = 'NA'
1 = 'Infectious and parasitic disease'
2 = 'neoplasm'
3 = 'blood disease'
4 = 'endocrine disease'
5 = 'mental and behaviour disorder'
6 = 'nervous disease'
7 = 'eye'
8 = 'ear'
9 = 'circulatory'
10= 'respiratory'
11= 'digestive'
12= 'skin'
13= 'muscle'
14= 'genitourinary'
15= 'pregnancy'
16= 'perinatal'
17= 'congenital'
18= 'clinical'
19= 'injury'
20= 'morbidity and mortality'
21= 'factors'
22= 'followup'
23= 'medical consultation'
24= 'blodd donation'
25= 'lab'
26= 'unjustified'
27= 'physiotherapy'
28= 'dental'
;
run;

*setup environmental parameters;
Expand Down Expand Up @@ -104,9 +104,9 @@ https://github.com/stat660/team-2_project1/blob/master/Absenteeism_at_work.xls?r
*check raw absenteeism_at_work dataset for duplicate records;
proc sort
noduprecs
data=Absenteeism_at_work_raw
dupout=Absenteeism_at_work_dups
out=Absenteeism_at_work_noduprecs
data=Absenteeism_at_work_raw
dupout=Absenteeism_at_work_dups
out=Absenteeism_at_work_noduprecs
;
by
id
Expand Down

0 comments on commit a776a67

Please sign in to comment.