-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathmethyLiftover.R
94 lines (80 loc) · 5.22 KB
/
methyLiftover.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
######################################################################
# methyLiftover - A utility to map whole-genome & reduced representation bisulfite data by genomic position to the Illumina
# 450k Methylation Array.
# Authors: Alexander J. Titus, E. Andres Houseman, Kevin C. Johnson, Brock C. Christensen
######################################################################
######################################################################
# Install and load dependency packages
######################################################################
setUpWorkSpace <- function() {
# Load the packages needed for the processing
if("data.table" %in% rownames(installed.packages()) == FALSE) {install.packages('data.table')}
if("IlluminaHumanMethylation450kanno.ilmn12.hg19" %in% rownames(installed.packages()) == FALSE) {
biocLite("IlluminaHumanMethylation450kanno.ilmn12.hg19")}
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(data.table)
}
######################################################################
# liftover450k: Liftover genomic data to the 450k probset CpG set.
######################################################################
# @param fileDir absolute directory path to the input file, including file name
# @param fileKeys vector containing two column names (class character) in order of c(chromosomeName, genomicPosition) EX/ c('chr', 'pos')
# @param fileSkipLine number of lines to skip before starting input. The default of 1 runs a standard data.table import skipping BED file headers
# @param outputfileDir the directory to save the liftover RData file
# @param outputFileName the name of the resulting output file of the liftover WITHOUT file extension
liftover450k <- function(fileDir, fileKeys, fileSkipLine = 1, outputFileDir, outputFileName){
# Load the Illumina 450k Methylation Array for merging
illumData <- IlluminaHumanMethylation450kanno.ilmn12.hg19@data
illumDataFrame <- illumData$Locations
illumDataFrame <- cbind(illumDataFrame, illumData$Islands.UCSC )
illumDataFrame$Type <- illumData$Manifest$Type
illumDataFrame$probe <- rownames(illumDataFrame)
illumDataTable <- data.table(as.data.frame(illumDataFrame))
inputFile <- fread(fileDir, skip = fileSkipLine)
WGBSliftover <- merge(illumDataTable, inputFile, by.x=c('chr', 'pos'), by.y=fileKeys)
rownames(WGBSliftover) <- WGBSliftover$probe
setwd(outputFileDir)
fileName <- paste(outputFileName, '.RData', sep='')
save(WGBSliftover, file = fileName)
}
######################################################################
# liftoverUserFiles: Liftover genomic data and merge two datasets.
######################################################################
# @param fileDir1 absolute directory path to file 1, including file name
# @param fileDir2 absolute directory path to file 1, including file name
# @param file1Keys vector containing two column names (class character) in order of c(chromosomeName, genomicPosition) EX/ c('chr', 'pos')
# @param file2Keys vector containing two column names (class character) in order of c(chromosomeName, genomicPosition) EX/ c('chr', 'pos')
# @param file1SkipLine number of lines to skip before starting file 1 import The default of 1 runs a standard data.table import skipping the BED header row
# @param file2SkipLine number of lines to skip before starting file 2 import The default of 1 runs a standard data.table import skipping the BED header row
# @param outputfileDir the directory to save the liftover RData file
# @param outputFileName the name of the resulting output file of the liftover WITHOUT file extension
liftoverUserFiles <- function(fileDir1, fileDir2, file1Keys, file2Keys, file1SkipLine = 1, file2SkipLine = 1, outputFileDir, outputFileName){
# Read in our files
file1 <- fread(fileDir1, skip=file1SkipLine)
file2 <- fread(fileDir2, skip=file2SkipLine)
# Create our merged/mapped file
liftOver <- merge(file1, file2, by.x=file1Keys, by.y=file2Keys)
# Save the output file
setwd(outputFileDir)
fileName <- paste(outputFileName, '.RData', sep='')
save(liftOver, file = fileName)
}
######################################################################
# Liftover genomic data from files within a directory to the 450k probset CpG set.
######################################################################
# @param fileDir directory where files are stored
# @param fileKeys vector containing two column names (class character) in order of c(chromosomeName, genomicPosition) EX/ c('chr', 'pos')
# @param fileSkipLine number of lines to skip before starting input. The default of 1 runs a standard data.table import skipping the BED header row
# @param outputfileDir the directory to save the liftover RData file
# @param outputFileName the name of the resulting output file of the liftover WITHOUT file extension
liftover450kset <- function(fileDir, fileKeys, fileSkipLine = 1, outputFileDir, outputFileName){
# Set the directory where the BED files are stored
setwd(fileDir)
# List all BED files in a directory
files <- list.files()
for(i in 1:length(files)){
outputFileNameUse <- paste(outputFileName, i, sep='_')
fileDirUse <- paste(fileDir, files[i], sep='/')
liftover450k(fileDirUse, fileKeys, fileSkipLine, outputFileDir, outputFileNameUse)
}
}