-
Notifications
You must be signed in to change notification settings - Fork 0
/
NewMappings.Rd
165 lines (111 loc) · 5.92 KB
/
NewMappings.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
\name{PKGNAMElistNewMappings}
\alias{PKGNAMEARRAYADDRESS}
\alias{PKGNAMENUID}
\alias{PKGNAMEPROBESEQUENCE}
\alias{PKGNAMEPROBEQUALITY}
\alias{PKGNAMECODINGZONE}
\alias{PKGNAMEGENOMICLOCATION}
\alias{PKGNAMEGENOMICMATCHSIMILARITY}
\alias{PKGNAMESECONDMATCHES}
\alias{PKGNAMESECONDMATCHSIMILARITY}
\alias{PKGNAMETRANSCRIPTOMICMATCHSIMILARITY}
\alias{PKGNAMEOTHERGENOMICMATCHES}
\alias{PKGNAMEREPEATMASK}
\alias{PKGNAMEOVERLAPPINGSNP}
\alias{PKGNAMEENTREZREANNOTATED}
\alias{PKGNAMEENSEMBLREANNOTATED}
\alias{PKGNAMESYMBOLREANNOTATED}
\alias{PKGNAMElistNewMappings}
\alias{PKGNAMEfullReannotation}
\alias{PKGNAMEREPORTERGROUPNAME}
\alias{PKGNAMEREPORTERGROUPID}
\title{Custom mappings added to the package}
\description{
We have used an extensive re-annotation of the PKGNAME probe sequences to provide additional information that is not captured in the standard Bioconductor packages. Whereas Bioconductor annotations are based on the RefSeq ID that each probe maps to, our additional mappings provide data specific to each probe on the platform. See below for details. We recommend using the probe quality as a form of filtering, and retaining only perfect or good probes for an analysis.
}
\section{Details of custom mappings}{
\describe{
\item{PKGNAMElistNewMappings}{List all the custom re-annotation mappings provided by the package}
\item{PKGNAMEfullReannotation}{Return all the re-annotation information as a matrix}
\item{PKGNAMEARRAYADDRESS}{Array Address code used to identify the probe at the bead-level}
\item{PKGNAMENUID}{Lumi's nuID (universal naming scheme for oligonucleotides) Reference: Du et al. (2007), Biol Direct 2:16}
\item{PKGNAMEPROBESEQUENCE}{The 50 base sequence for the probe}
\item{PKGNAMEPROBEQUALITY}{Quality grade assigned to the probe: \dQuote{Perfect} if it perfectly and uniquely matches the target transcript; \dQuote{Good} if the probe, although imperfectly matching the target transcript, is still likely to provide considerably sensitive signal (up to two mismatches are allowed, based on empirical evidence that the signal intensity for 50-mer probes with less than 95\% identity to the respective targets is less than 50\% of the signal associated with perfect matches *); \dQuote{Bad} if the probe matches repeat sequences, intergenic or intronic regions, or is unlikely to provide specific signal for any transcript; \dQuote{No match} if it does not match any genomic region or transcript.}
\item{PKGNAMECODINGZONE}{Coding status of target sequence: intergenic / intronic / Transcriptomic (\dQuote{Transcriptomic} when the target transcript is non-coding or there is no information on the coding sequence)}
\item{PKGNAMEGENOMICLOCATION}{Probe's genomic coordinates (hg19 for human, mm9 for mouse or rn4 for rat)}
\item{PKGNAMEGENOMICMATCHSIMILARITY}{Percentage of similarity between the probe and its best genomic match in the alignable region, taking the probe as reference}
\item{PKGNAMESECONDMATCHES}{Genomic coordinates of second best matches between the probe and the genome}
\item{PKGNAMESECONDMATCHSIMILARITY}{Percentage of similarity between the probe and its second best genomic match in the alignable region, taking the probe as reference}
\item{PKGNAMETRANSCRIPTOMICMATCHSIMILARITY}{Percentage of similarity between the probe and its target transcript in the alignable region, taking the probe as reference}
\item{PKGNAMEOTHERGENOMICMATCHES}{Genomic coordinates of sequences as alignable with the probe (in terms of number of matching nucleotides) as its main target}
\item{PKGNAMEREPEATMASK}{Overlapping RepeatMasked sequences, with number of bases overlapped by the repeat}
\item{PKGNAMEOVERLAPPINGSNP}{Overlapping annotated SNPs}
\item{PKGNAMEENTREZREANNOTATED}{Entrez IDs}
\item{PKGNAMEENSEMBLREANNOTATED}{Ensembl IDs}
\item{PKGNAMESYMBOLREANNOTATED}{Gene symbol derived by re-annotation}
\item{PKGNAMEREPORTERGROUPID}{For probes marked as controls in Illuminas annotation file, these gives the type of control}
\item{PKGNAMEREPORTERGROUPNAME}{Usually a more informative name for the control type}
}
}
\references{
\url{http://remoat.sysbiol.cam.ac.uk}
Barbosa-Morais et al. (2010) A re-annotation pipeline for Illumina BeadArrays: improving the interpretation of gene expression data. Nucleic Acids Research
}
\examples{
##See what new mappings are available
PKGNAMElistNewMappings()
x <- PKGNAMEPROBEQUALITY
mapped_probes <- mappedkeys(x)
# Convert to a list
xx <- as.list(x[mapped_probes])
if(length(xx) > 0) {
# Get the PROBEQUALITY for the first five probes
xx[1:5]
# Get the first one
xx[[1]]
}
##Overall table of qualities
table(unlist(xx))
x <- PKGNAMEARRAYADDRESS
mapped_probes <- mappedkeys(x)
# Convert to a list
xx <- as.list(x[mapped_probes])
if(length(xx) > 0) {
# Get the ARRAYADDRESS for the first five probes
xx[1:5]
# Get the first one
xx[[1]]
}
##Can do the mapping from array address to illumina ID using a revmap
y<- revmap(PKGNAMEARRAYADDRESS)
mapped_probes <- mappedkeys(y)
# Convert to a list
yy <- as.list(y[mapped_probes])
if(length(yy) > 0) {
# Get the ARRAYADDRESS for the first five probes
yy[1:5]
# Get the first one
yy[[1]]
}
x <- PKGNAMECODINGZONE
mapped_probes <- mappedkeys(x)
# Convert to a list
xx <- as.list(x[mapped_probes])
if(length(xx) > 0) {
# Get the CODINGZONE for the first five probes
xx[1:5]
# Get the first one
xx[[1]]
}
x <- PKGNAMEPROBESEQUENCE
mapped_probes <- mappedkeys(x)
# Convert to a list
xx <- as.list(x[mapped_probes])
if(length(xx) > 0) {
# Get the PROBESEQUENCE for the first five probes
xx[1:5]
# Get the first one
xx[[1]]
}
}
\keyword{datasets}