-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgeneralized-additive-model.irnb
1 lines (1 loc) · 5.43 KB
/
generalized-additive-model.irnb
1
{"cells":[{"metadata":{"trusted":true,"scrolled":true,"_uuid":"716ad23dbdba075512ea79252f622d14ae7b1264"},"cell_type":"code","source":"# str(select_if(df, is.numeric)[,20:120])\n# cor(model_df, use = \"complete.obs\")\n# model_df <- df[c(\"EXT_SOURCE_1\",\"EXT_SOURCE_2\",\"EXT_SOURCE_3\",\"TARGET\")]\n# which(colnames(df) %in% colnames(select_if(df, is.factor)))\n# continuous_vars <- which(colnames(df) %in% colnames(select_if(df, is.numeric)))\n# categorical_vars <- which(colnames(df) %in% colnames(select_if(df, is.factor)))","execution_count":2,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"549d182fd759708276055b6a602409ead80dad9d"},"cell_type":"code","source":"require(tidyverse)\nrequire(mgcv)","execution_count":3,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"0675c09d883e58a067e33e138abd112e07d6cf82"},"cell_type":"code","source":"train <- read_csv(\"../input/application_train.csv\")\ntraindex <- 1:nrow(train)\ny <- train$TARGET\ndf <- train %>% \n select(-TARGET) %>%\n bind_rows(read_csv(\"../input/application_test.csv\")) %>%\n mutate_if(is.numeric, funs(ifelse(is.na(.), -999, .))) %>%\n replace(., is.na(.), \"missing\") %>%\n mutate_if(is.character, funs(factor(.) %>% as.factor())) %>%\n mutate(FLAG_MOBIL = as.factor(FLAG_MOBIL),\n FLAG_WORK_PHONE = as.factor(FLAG_WORK_PHONE),\n FLAG_CONT_MOBILE = as.factor(FLAG_CONT_MOBILE),\n FLAG_EMP_PHONE = as.factor(FLAG_EMP_PHONE),\n FLAG_PHONE = as.factor(FLAG_PHONE),\n FLAG_EMAIL = as.factor(FLAG_EMAIL),\n REG_REGION_NOT_LIVE_REGION = as.factor(REG_REGION_NOT_LIVE_REGION),\n LIVE_REGION_NOT_WORK_REGION = as.factor(LIVE_REGION_NOT_WORK_REGION),\n REG_CITY_NOT_LIVE_CITY = as.factor(REG_CITY_NOT_LIVE_CITY),\n REG_CITY_NOT_WORK_CITY = as.factor(REG_CITY_NOT_WORK_CITY),\n FLAG_DOCUMENT_2 = as.factor(FLAG_DOCUMENT_2),\n FLAG_DOCUMENT_3 = as.factor(FLAG_DOCUMENT_3),\n FLAG_DOCUMENT_4 = as.factor(FLAG_DOCUMENT_4),\n FLAG_DOCUMENT_5 = as.factor(FLAG_DOCUMENT_5),\n FLAG_DOCUMENT_7 = as.factor(FLAG_DOCUMENT_7),\n FLAG_DOCUMENT_8 = as.factor(FLAG_DOCUMENT_8),\n FLAG_DOCUMENT_9 = as.factor(FLAG_DOCUMENT_9),\n FLAG_DOCUMENT_10 = as.factor(FLAG_DOCUMENT_10),\n FLAG_DOCUMENT_11 = as.factor(FLAG_DOCUMENT_11),\n FLAG_DOCUMENT_12 = as.factor(FLAG_DOCUMENT_12),\n FLAG_DOCUMENT_13 = as.factor(FLAG_DOCUMENT_13),\n FLAG_DOCUMENT_14 = as.factor(FLAG_DOCUMENT_14),\n FLAG_DOCUMENT_15 = as.factor(FLAG_DOCUMENT_15),\n FLAG_DOCUMENT_16 = as.factor(FLAG_DOCUMENT_16),\n FLAG_DOCUMENT_17 = as.factor(FLAG_DOCUMENT_17),\n FLAG_DOCUMENT_18 = as.factor(FLAG_DOCUMENT_18),\n FLAG_DOCUMENT_19 = as.factor(FLAG_DOCUMENT_19),\n FLAG_DOCUMENT_20 = as.factor(FLAG_DOCUMENT_20),\n FLAG_DOCUMENT_21 = as.factor(FLAG_DOCUMENT_21)) %>%\n select(-SK_ID_CURR)\n# Train and submission set\nX <- df[traindex, ]\ntest <- df[-traindex, ]\n\nrm(df, train); gc()","execution_count":22,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"e84c9436686e6f93fe3b88b5c3ad9e8268aeb3bd"},"cell_type":"code","source":"X <- X[c(\"EXT_SOURCE_3\",\"EXT_SOURCE_2\",\"EXT_SOURCE_1\", \"DAYS_BIRTH\",\"AMT_CREDIT\",\"AMT_ANNUITY\",\"DAYS_EMPLOYED\",\n \"CODE_GENDER\", \"OWN_CAR_AGE\",\"OCCUPATION_TYPE\")]\ncontinuous_vars <- colnames(select_if(X, is.numeric))\ncategorical_vars <- colnames(select_if(X, is.factor))\nform <-as.formula(paste0(\"y ~\",paste0(\"s(\",continuous_vars,\")\",collapse=\"+\"),\n \"+\",paste0(categorical_vars,collapse=\"+\"),collapse=\"\"))\n\ncontinuous_vars <- colnames(select_if(X, is.numeric))\ncategorical_vars <- colnames(select_if(X, is.factor))\nform <-as.formula(paste0(\"y ~\",paste0(\"s(\",continuous_vars,\")\",collapse=\"+\"),\"+\",paste0(categorical_vars,collapse=\"+\"),collapse=\"\"))","execution_count":25,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"083283725bb3e967302d03924a3d8e496660dc8a"},"cell_type":"code","source":"gam1 <- gam(formula=form,data=X, family=binomial)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"e739ec7aa7e74f698ded57ae260009686e250dd9"},"cell_type":"code","source":"plot(gam1,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)","execution_count":37,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"e62a43a5eff2a7440f6f05fdd9122ad72329d936","scrolled":true},"cell_type":"code","source":"summary(gam1)","execution_count":38,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"c346459bed17141584ebd0e4e2b31cec4c72c3a6"},"cell_type":"code","source":"pred = predict(gam1, newdata = test)","execution_count":39,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"4c03ebbe50db807b40a926c841d32753d06374b7"},"cell_type":"code","source":"read_csv(\"../input/sample_submission.csv\") %>% \n mutate(SK_ID_CURR = as.integer(SK_ID_CURR),\n TARGET = pred) %>%\n write_csv(\"submission.csv\")","execution_count":40,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"26cb5b6b5f7d92f232cb29a38a4e79dec876a512"},"cell_type":"code","source":"pred[0:5]","execution_count":41,"outputs":[]},{"metadata":{"trusted":true,"_uuid":"8d4924936d0c6cb9e3acd5708adea88c13061647"},"cell_type":"code","source":"","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"display_name":"R","language":"R","name":"ir"},"language_info":{"mimetype":"text/x-r-source","name":"R","pygments_lexer":"r","version":"3.4.2","file_extension":".r","codemirror_mode":"r"}},"nbformat":4,"nbformat_minor":1}