-
Notifications
You must be signed in to change notification settings - Fork 2
/
backupjava
180 lines (148 loc) · 4.87 KB
/
backupjava
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import java.io.IOException;
import java.util.ArrayList ;
import java.util.Collections ;
import java.util.Comparator ;
import java.util.Date ;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary ;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
/**
* Copyright 2009 Rapleaf, Inc.
* Cesar Delgado
* cesar AT rapleaf.com
* License: Apache License 2.0
**/
public class Backup {
/**
* Prints out usage
**/
static void usage() {
System.err.println("Usage: hadoop Backup <path on hdfs> <path on local fs> <unix time> [<max size to backup in bytes>]") ;
System.exit(1) ;
}
public static void main (String[] argv) throws IOException {
if (argv.length < 3)
usage() ;
Path baseDir = new Path(argv[0]) ; // HDFS path
String localPath = argv[1] ; // Local path
long minDate = Long.parseLong(argv[2]) ;// UNIX date since epoch of last backup
long maxDate = new Date().getTime() / 1000 ; // UNIX date for right now
long tmpDate = 0 ;
long size = 0 ;
if (argv.length == 4)
size = Long.parseLong(argv[3]) ; //Get the max size param
Backup bak = new Backup() ;
ArrayList<Path> pathList = new ArrayList<Path>(2000) ;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
if (fs.getFileStatus(baseDir).isDir()) { //If the HDFS path is a dir continue
System.err.println("Backing up: " + baseDir.toUri().getPath()) ;
bak.checkDir(fs, minDate, maxDate, baseDir, pathList) ;
System.err.println("Number of files searched = " + pathList.size()) ;
Collections.sort(pathList, new DateComparator(conf, fs)) ;
tmpDate = bak.backupFiles( localPath, fs, pathList, size) ;
}
if (tmpDate == 0) { // If not size limit reached print out date for right now
System.out.println(maxDate) ;
} else { // Print out date for last file backed up
System.err.println("Size limit reached.") ;
System.out.println(tmpDate) ;
}
}
static class DateComparator implements Comparator {
Configuration conf ;
FileSystem fs ;
public DateComparator(Configuration c, FileSystem f) {
conf = c ;
fs = f ;
}
public int compare(Object path1, Object path2) {
try {
long date1, date2 ;
date1 = fs.getFileStatus((Path) path1).getModificationTime() ;
date2 = fs.getFileStatus((Path) path2).getModificationTime() ;
if( date1 > date2 )
return 1;
else if(date1 < date2 )
return -1;
else
return 0;
} catch (IOException e) {
System.err.println("Something went wrong when trying to compare dates") ;
System.err.println(e) ;
System.exit(1) ;
return 0 ;
}
}
}
public Backup() {
}
/**
* Method to move files from HDFS to local filesystem
*
* localPath: Path on the machines filesystem
* fs: FileSystem object from HDFS
* pathList: List of paths for files that might need to be backed up
* size: max size in bytes to be backed up
*
* Returns Date of the last files backed up if reached size limit, else, zero
**/
public long backupFiles(String localPath, FileSystem fs, ArrayList<Path> pathList, long size) {
Path fsPath ;
long tmpSize = 0 ;
long tmpDate = 0 ;
// Start iterating over all paths
for (Path hdfsPath : pathList) {
try {
tmpSize = tmpSize + fs.getContentSummary(hdfsPath).getLength() ;
if ((tmpSize <= size) || (size == 0)) {
tmpDate = fs.getFileStatus(hdfsPath).getModificationTime() / 1000 ;
System.err.print("File: " + hdfsPath.toUri().getPath()) ;
fsPath = new Path(localPath + hdfsPath.toUri().getPath()) ;
fs.copyToLocalFile(hdfsPath, fsPath) ;
} else {
return tmpDate ;
}
System.err.println(" Done") ;
} catch (IOException e) {
System.err.println("Something wrong with the file") ;
System.err.println(e) ;
System.exit(1) ;
return 0 ;
}
}
return 0 ;
}
/**
* Method to go though the HDFS filesystem in a DFS to find all files
*
* fs: FileSystem object from HDFS
* minDate: Oldest date for files to be backed up
* maxDate: Newest date for files to be backed up
* p: Path in HDFS to look for files
* pathList: Will be filled with all files in p
**/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList) {
long tmpDate ;
FileStatus[] fStat ;
try {
if (fs.getFileStatus(p).isDir()) { // If this is a directory
fStat = fs.listStatus(p) ;
for (int i = 0; i < fStat.length; i++) { // Do a recursive call to all elements
checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList) ;
}
} else { // If not a directory then we've found a file
tmpDate = fs.getFileStatus(p).getModificationTime() / 1000 ;
if ((minDate <= tmpDate) && (maxDate > tmpDate)){
pathList.add(p) ;
}
}
} catch (IOException e) {
System.err.println("Could not open" + p) ;
System.err.println(e) ;
System.exit(1) ;
}
}
}