forked from gigablast/open-source-search-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
File.h
212 lines (155 loc) · 6.08 KB
/
File.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
// Matt Wells, Copyright May 2001
// . TODO: don't closes block us? if we have many fd's our closes might block!!
// . TODO: must we create a separate fd for each non-blocking read even if
// on the same file?????? that would save us...
// . this class simulates having 1K file descriptors.
// . by using it's open/write/read/close it will make it seem like you have 5K file descriptors
// . minimizes the # of open/closes it does.
// On my solaris ultra 1 i could do 28,000 open/close pairs per second.
// my 400mhz pentium linux box was 2.5 times faster! it only had 256 file
// descriptors to work with, while the sun box had 1024.
// the sockets must share with these so we'd like to set a maximum for each.
#ifndef _FILE_H_
#define _FILE_H_
#define MAX_FILENAME_LEN 128
// . max # of VIRTUAL file descriptors
// . man, chris has 958 files, lets crank it up from 2k to 5k
// . boost up to 50,000 since we are hitting this limit with crawlbot
// . we are hitting again with crawlbot, boost to 200k from 50k
// . TODO: make this dynamically allocate based on need
//#define MAX_NUM_VFDS (1024*1024)
#include <sys/types.h> // for open/lseek
#include <sys/stat.h> // for open
#include <fcntl.h> // for open
#include <sys/stat.h> // for stat
#include "Mem.h" // for g_mem
#include "Loop.h" // for g_loop.setNonBlocking(int fd)
#include "SafeBuf.h"
bool doesFileExist ( char *filename ) ;
int64_t getFileSize ( char *filename ) ;
int64_t getFileSize_cygwin ( char *filename ) ;
// for avoiding unlink/opens that mess up our threaded read
int32_t getCloseCount_r ( int fd );
// prevent fd from being closed on us when we are writing
void enterWriteMode ( int fd ) ;
void exitWriteMode ( int fd ) ;
// error correction routine used by BigFile.cpp
//void releaseVfd ( int32_t vfd ) ;
//int getfdFromVfd ( int32_t vfd ) ;
class File {
friend class BigFile;
public:
// along the same lines as getCloseCount_r()
//void incCloseCount_r ( ) ;
File ( );
~File ( );
void constructor();
void destructor ();
// . if you don't need to do a full open then just set the filename
// . useful for unlink/rename/reserve/...
// . IMPORTANT: if bytes were already reserved can only increase the
// reserve, not decrease
void set ( char *dir , char *filename );
void set ( char *filename );
// returns false and sets errno on error, returns true on success
bool rename ( char *newFilename );
bool calledOpen () { return m_calledOpen; };
bool calledSet () { return m_calledSet; };
bool isNonBlocking () ;
// . get the file extension of this file
// . return NULL if none
char *getExtension ( ) ;
// uses lseek to get file's current position
int32_t getCurrentPos ( ) ;
// . open() returns true on success, false on failure, errno is set.
// . opens for reading/writing only
// . returns false if does not exist
bool open ( int flags , int permissions =
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
// . use an offset of -1 to use current file seek position
// . returns what ::read returns
// . returns -1 on lseek failure (if offset is valid)
// . returns 0 on EOF
// . returns numBytesRead if not error
// . a negative offset means current read offset
int read ( void *buf , int32_t size , int32_t offset );
// . use an offset of -1 to use current file seek position
// . returns what ::write returns
// . returns -1 on lseek failure (if offset is valid)
// . returns numBytesWritten if not error
// . this is non-blocking so may return < "numBytesToWrite"
// . a negative offset means current write offset
int write ( void *buf , int32_t size , int32_t offset );
// . this will really close this file
bool close ( );
// . flush the output
bool flush ( );
// used by threaded unlinks and renames by BigFile.cpp
bool m_closedIt;
void close1_r ();
void close2 ();
// . returns -1 on error
// . otherwise returns file size in bytes
// . returns 0 if does not exist
int64_t getFileSize ( );
// . when was it last touched?
time_t getLastModifiedTime ( );
// . returns -1 on error and sets errno
// . returns 0 if does not exist
// . returns 1 if it exists
// . a simple stat check
int32_t doesExist ( );
// . static so you don't need an instant of this class to call it
// . returns false and sets errno on error
bool unlink ( );
// . file position seeking -- just a wrapper for lseek
// . returns -1 on error
// . used by reserve/write/read/getFileSize()
int32_t lseek ( int32_t offset , int whence = SEEK_SET );
// . interface so BigFile and others can access the static member info
//char *getName ( ) ;
//int getMode ( ) ;
//int getPermissions ( ) ;
// . will try to REopen the file to get the fd if necessary
// . used by BigFile
// . returns -2 if we've never been officially opened
// . returns -1 on error getting the fd or opening this file
// . must call open() before calling this
int getfd ( ) ;
// return -1 if not opened, otherwise, return the opened fd
int getfdNoOpen ( ) ;
//char *getFilename ( ) { return m_filename.getBufStart(); };
char *getFilename ( ) { return m_filename; };
// our filename allocated with strdup
// we publicize for ease of use
char m_filename [ MAX_FILENAME_LEN ];
//SafeBuf m_filename;
//char m_filenameBuf [ MAX_FILENAME_LEN ];
// File::rename() uses this
//char m_oldFilename [ MAX_FILENAME_LEN ];
// BigFile uses these when passing us to a thread for unlink/rename
// so it can store its THIS ptr and the i in BigFile::m_files[i]
void *m_this;
int32_t m_i;
int32_t m_closeCount;
// private:
// initializes the fd pool
bool initialize ();
// free the least-used file.
bool closeLeastUsed ( );
// THIS file's VIRTUAL descriptor
//int m_vfd;
// now just the real fd. is -1 if not opened
int m_fd;
// save the permission and flag sets in case of re-opening
int m_flags;
//int m_permissions;
char m_calledOpen;
char m_calledSet;
time_t m_st_mtime; // file last mod date
int32_t m_st_size; // file size
time_t getLastModifiedDate ( ) ;
//class File *m_nextActive;
//class File *m_prevActive;
};
#endif