From c8af72418dca10a36a28022edf74b957f5aa19cc Mon Sep 17 00:00:00 2001 From: Will Milton Date: Fri, 26 Jul 2019 16:49:33 +0000 Subject: [PATCH 01/21] pass content sha512 ... so that the server can redirect us without needing to read the data. --- src/filecache.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index 2dd1bd1e..d9c53834 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1078,15 +1078,40 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) CURL *session; struct curl_slist *slist = NULL; FILE *fp; + GChecksum *checksum; fp = fdopen(dup(fd), "r"); if (!fp) { g_set_error(gerr, system_quark(), errno, "%s: NULL fp from fdopen on fd %d for path %s", funcname, fd, path); - goto finish; + goto ifinish; } if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); - goto finish; + goto ifinish; + } + + int num_read, is_eof, is_ferror; + guchar fbytes[1024]; + num_read = is_eof = is_ferror = 0; + checksum = g_checksum_new(G_CHECKSUM_SHA512); + while (is_eof == 0) { + num_read = fread(fbytes, sizeof(fbytes), 1, fp); + if (num_read == 0) { + if ((is_ferror = ferror(fp)) != 0) { + g_set_error(gerr, system_quark(), errno, "%s: fread for checksum error on path %s", funcname, path); + goto ifinish; + } + if ((is_eof = feof(fp)) == 0) { + g_set_error(gerr, system_quark(), errno, "%s: fread for checksum no eof short read on path %s", funcname, path); + goto ifinish; + } + } + g_checksum_update(checksum, fbytes, sizeof(fbytes)); + } + + if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { + g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); + goto ifinish; } // REVIEW: We didn't use to check for sesssion == NULL, so now we @@ -1096,7 +1121,7 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) g_set_error(gerr, curl_quark(), E_FC_CURLERR, "%s: Failed session_request_init on PUT", funcname); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); - goto finish; + goto ifinish; } curl_easy_setopt(session, CURLOPT_CUSTOMREQUEST, "PUT"); @@ -1104,6 +1129,11 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) curl_easy_setopt(session, CURLOPT_INFILESIZE, st.st_size); curl_easy_setopt(session, CURLOPT_READDATA, (void *) fp); + char* sha512_header; + asprintf(&sha512_header, "If-None-Match: %s", g_checksum_get_string); + slist = curl_slist_append(slist, sha512_header); + free(sha512_header); + slist = enhanced_logging(slist, LOG_DYNAMIC, SECTION_FILECACHE_COMM, "put_return_tag: %s", path); if (slist) curl_easy_setopt(session, CURLOPT_HTTPHEADER, slist); @@ -1114,15 +1144,21 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) timed_curl_easy_perform(session, &res, &response_code, &elapsed_time); - fclose(fp); - if (slist) curl_slist_free_all(slist); bool non_retriable_error = process_status("put", session, res, response_code, elapsed_time, idx, path, false); // Some errors should not be retried. (Non-errors will fail the // for loop test and fall through naturally) if (non_retriable_error) break; - } + + ifinish: + // close only if we successfully opened + if (fp) { + fclose(fp); + } + g_checksum_free(checksum); + goto finish; + } // end for loop if ((res != CURLE_OK || response_code >= 500) || inject_error(filecache_error_etagcurl1)) { trigger_saint_event(CLUSTER_FAILURE, "put"); @@ -1238,7 +1274,6 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) log_print(LOG_DEBUG, SECTION_FILECACHE_COMM, "PUT returns etag: %s", etag); finish: - log_print(LOG_DEBUG, SECTION_FILECACHE_FLOCK, "%s: releasing exclusive file lock on fd %d", funcname, fd); if (flock(fd, LOCK_UN) || inject_error(filecache_error_etagflock2)) { g_set_error(gerr, system_quark(), errno, "%s: error releasing exclusive file lock", funcname); From 2a7c524b371a7273ad97167bd66b5243b4f96f4a Mon Sep 17 00:00:00 2001 From: Jerry Blakley Date: Thu, 20 Jun 2019 20:17:47 +0000 Subject: [PATCH 02/21] More saint mode stats (#493) (#494) --- src/filecache.c | 32 +++++++++++++++++++++++--------- src/filecache.h | 3 ++- src/fusedav.c | 45 ++++++++++++++++++++++----------------------- src/props.c | 3 ++- src/session.c | 10 ++++------ src/session.h | 4 +++- 6 files changed, 56 insertions(+), 41 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index fc466620..2dd1bd1e 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -350,7 +350,7 @@ static size_t write_response_to_fd(void *ptr, size_t size, size_t nmemb, void *u // Get a file descriptor pointing to the latest full copy of the file. static void get_fresh_fd(filecache_t *cache, const char *cache_path, const char *path, struct filecache_sdata *sdata, - struct filecache_pdata **pdatap, int flags, bool use_local_copy, GError **gerr) { + struct filecache_pdata **pdatap, int flags, bool use_local_copy, bool rw, GError **gerr) { static const char *funcname = "get_fresh_fd"; GError *tmpgerr = NULL; struct filecache_pdata *pdata; @@ -400,20 +400,26 @@ static void get_fresh_fd(filecache_t *cache, g_set_error(gerr, system_quark(), errno, "%s: open failed: %s", funcname, strerror(errno)); // use_local_copy means we're in saint mode // otherwise we failed on something we expected based on last_server_update + // I believe we can get here on a PUT, and succeed in getting a previous copy from the cache + // This will eventually fail if we are already in saint mode, but should succeed for now if (use_local_copy) { - stats_counter("get_saint_mode_failure", 1, samplerate); + if (rw ) stats_counter("put_saint_mode_failure", 1, samplerate); + else stats_counter("get_saint_mode_failure", 1, samplerate); log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "%s: get_saint_mode_failure on file: %s::%s", funcname, path, pdata->filename); } else { - stats_counter("get_cache_failure", 1, samplerate); + if (rw) stats_counter("put_cache_failure", 1, samplerate); + else stats_counter("get_cache_failure", 1, samplerate); log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "%s: get_cache_failure on file in cache: %s::%s", funcname, path, pdata->filename); } goto finish; } else { if (use_local_copy) { - stats_counter("get_saint_mode_success", 1, samplerate); + if (rw) stats_counter("put_saint_mode_success", 1, samplerate); + else stats_counter("get_saint_mode_success", 1, samplerate); log_print(LOG_NOTICE, SECTION_FILECACHE_OPEN, "%s: get_saint_mode_success on file: %s::%s", funcname, path, pdata->filename); } else { - stats_counter("get_cache_success", 1, samplerate); + if (rw) stats_counter("put_cache_success", 1, samplerate); + else stats_counter("get_cache_success", 1, samplerate); log_print(LOG_INFO, SECTION_FILECACHE_OPEN, "%s: get_cache_success on file in cache: %s::%s", funcname, path, pdata->filename); } } @@ -474,12 +480,16 @@ static void get_fresh_fd(filecache_t *cache, long elapsed_time = 0; CURL *session; struct curl_slist *slist = NULL; + rwp_t rwp; + + if (rw) rwp = WRITE; + else rwp = READ; // These will be -1 and [0] = '\0' on idx 0; but subsequent iterations we need to clean up from previous time if (response_fd >= 0) close(response_fd); if (response_filename[0] != '\0') unlink(response_filename); - session = session_request_init(path, NULL, false, false); + session = session_request_init(path, NULL, false, rwp); if (!session || inject_error(filecache_error_freshsession)) { g_set_error(gerr, curl_quark(), E_FC_CURLERR, "%s: Failed session_request_init on GET", funcname); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. @@ -791,7 +801,8 @@ static void get_fresh_fd(filecache_t *cache, } // top-level open call -void filecache_open(char *cache_path, filecache_t *cache, const char *path, struct fuse_file_info *info, bool grace, GError **gerr) { +void filecache_open(char *cache_path, filecache_t *cache, const char *path, struct fuse_file_info *info, + bool grace, bool rw, GError **gerr) { struct filecache_pdata *pdata = NULL; struct filecache_sdata *sdata = NULL; GError *tmpgerr = NULL; @@ -855,7 +866,7 @@ void filecache_open(char *cache_path, filecache_t *cache, const char *path, stru // Get a file descriptor pointing to a guaranteed-fresh file. log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "filecache_open: calling get_fresh_fd on %s", path); - get_fresh_fd(cache, cache_path, path, sdata, &pdata, flags, use_local_copy, &tmpgerr); + get_fresh_fd(cache, cache_path, path, sdata, &pdata, flags, use_local_copy, rw, &tmpgerr); if (tmpgerr) { // If we got a network error (curl_quark is a marker) and we // are using grace, try again but use the local copy @@ -1033,6 +1044,9 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) static const unsigned small_time_allotment = 4000; // 4 seconds static const unsigned large_time_allotment = 8000; // 8 seconds const float samplerate = 1.0; // always sample stats + // If in saint mode and we count this request as a saint_write in get_fresh_fd(), we would be + // double counting; but I don't see how we get here at all if we detect saint mode in get_fresh_fd() + rwp_t rwp = WRITE; BUMP(filecache_return_etag); @@ -1077,7 +1091,7 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) // REVIEW: We didn't use to check for sesssion == NULL, so now we // also call try_release_request_outstanding. Is this OK? - session = session_request_init(path, NULL, false, true); + session = session_request_init(path, NULL, false, rwp); if (!session || inject_error(filecache_error_freshsession)) { g_set_error(gerr, curl_quark(), E_FC_CURLERR, "%s: Failed session_request_init on PUT", funcname); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. diff --git a/src/filecache.h b/src/filecache.h index 1ff533f1..b8425070 100644 --- a/src/filecache.h +++ b/src/filecache.h @@ -43,7 +43,8 @@ typedef leveldb_t filecache_t; void filecache_print_stats(void); void filecache_init(char *cache_path, GError **gerr); void filecache_delete(filecache_t *cache, const char *path, bool unlink, GError **gerr); -void filecache_open(char *cache_path, filecache_t *cache, const char *path, struct fuse_file_info *info, bool grace, GError **gerr); +void filecache_open(char *cache_path, filecache_t *cache, const char *path, struct fuse_file_info *info, + bool grace, bool rw, GError **gerr); ssize_t filecache_read(struct fuse_file_info *info, char *buf, size_t size, off_t offset, GError **gerr); ssize_t filecache_write(struct fuse_file_info *info, const char *buf, size_t size, off_t offset, GError **gerr); void filecache_close(struct fuse_file_info *info, GError **gerr); diff --git a/src/fusedav.c b/src/fusedav.c index 3bc9e218..340a6345 100644 --- a/src/fusedav.c +++ b/src/fusedav.c @@ -177,6 +177,7 @@ static void getdir_propfind_callback(__unused void *userdata, const char *path, struct fusedav_config *config = fuse_get_context()->private_data; struct stat_cache_value *existing = NULL; struct stat_cache_value value; + rwp_t rwp = PROPFIND; GError *subgerr1 = NULL ; log_print(LOG_INFO, SECTION_FUSEDAV_PROP, "%s: %s (%lu)", funcname, path, status_code); @@ -309,7 +310,7 @@ static void getdir_propfind_callback(__unused void *userdata, const char *path, bool tmp_session = true; long elapsed_time = 0; - if (!(session = session_request_init(path, NULL, tmp_session, false)) || inject_error(fusedav_error_propfindsession)) { + if (!(session = session_request_init(path, NULL, tmp_session, rwp)) || inject_error(fusedav_error_propfindsession)) { g_set_error(gerr, fusedav_quark(), ENETDOWN, "%s(%s): failed to get request session", funcname, path); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); @@ -1160,6 +1161,7 @@ static void common_unlink(const char *path, bool do_unlink, GError **gerr) { struct fusedav_config *config = fuse_get_context()->private_data; struct stat st; struct stat_cache_value value; + rwp_t rwp = WRITE; GError *gerr2 = NULL; GError *gerr3 = NULL; @@ -1183,7 +1185,7 @@ static void common_unlink(const char *path, bool do_unlink, GError **gerr) { struct curl_slist *slist = NULL; long elapsed_time = 0; - if (!(session = session_request_init(path, NULL, false, true)) || inject_error(fusedav_error_cunlinksession)) { + if (!(session = session_request_init(path, NULL, false, rwp)) || inject_error(fusedav_error_cunlinksession)) { g_set_error(gerr, fusedav_quark(), ENETDOWN, "%s(%s): failed to get request session", funcname, path); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); @@ -1271,6 +1273,7 @@ static int dav_rmdir(const char *path) { struct stat st; long response_code = 500; // seed it as bad so we can enter the loop CURLcode res = CURLE_OK; + rwp_t rwp = WRITE; if (use_readonly_mode()) { log_print(LOG_WARNING, SECTION_FUSEDAV_FILE, "dav_rmdir: %s aborted; in readonly mode", path); @@ -1315,7 +1318,7 @@ static int dav_rmdir(const char *path) { struct curl_slist *slist = NULL; long elapsed_time = 0; - if (!(session = session_request_init(fn, NULL, false, true))) { + if (!(session = session_request_init(fn, NULL, false, rwp))) { log_print(LOG_ERR, SECTION_FUSEDAV_DIR, "%s(%s): failed to get session", funcname, path); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); @@ -1372,6 +1375,7 @@ static int dav_mkdir(const char *path, mode_t mode) { GError *gerr = NULL; long response_code = 500; // seed it as bad so we can enter the loop CURLcode res = CURLE_OK; + rwp_t rwp = WRITE; if (use_readonly_mode()) { log_print(LOG_WARNING, SECTION_FUSEDAV_FILE, "dav_mkdir: %s aborted; in readonly mode", path); @@ -1390,7 +1394,7 @@ static int dav_mkdir(const char *path, mode_t mode) { struct curl_slist *slist = NULL; long elapsed_time = 0; - if (!(session = session_request_init(fn, NULL, false, true))) { + if (!(session = session_request_init(fn, NULL, false, rwp))) { log_print(LOG_ERR, SECTION_FUSEDAV_DIR, "%s(%s): failed to get session", funcname, path); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); @@ -1451,6 +1455,7 @@ static int dav_rename(const char *from, const char *to) { struct stat_cache_value *entry = NULL; long response_code = 500; // seed it as bad so we can enter the loop CURLcode res = CURLE_OK; + rwp_t rwp = WRITE; if (use_readonly_mode()) { log_print(LOG_WARNING, SECTION_FUSEDAV_FILE, "dav_rename: %s aborted; in readonly mode", from); @@ -1483,7 +1488,7 @@ static int dav_rename(const char *from, const char *to) { char *escaped_to; long elapsed_time = 0; - if (!(session = session_request_init(from, NULL, false, true))) { + if (!(session = session_request_init(from, NULL, false, rwp))) { log_print(LOG_ERR, SECTION_FUSEDAV_FILE, "%s: failed to get session for %d:%s", funcname, fd, from); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); @@ -1837,13 +1842,24 @@ static int dav_mknod(const char *path, mode_t mode, __unused dev_t rdev) { return 0; } +static bool write_flag(int flags) { + // O_RDWR technically belongs in the list, but since it might be used for files + // which are only read, I leave it out. + // O_CREAT on a file which already exists is a noop unless O_EXCL is also included. + // So, only respond if both are present. + if ((flags & O_WRONLY) || ((flags & O_CREAT) && (flags & O_EXCL)) || (flags & O_TRUNC) || (flags & O_APPEND)) { + return true; + } + return false; +} + static void do_open(const char *path, struct fuse_file_info *info, GError **gerr) { struct fusedav_config *config = fuse_get_context()->private_data; GError *tmpgerr = NULL; assert(info); - filecache_open(config->cache_path, config->cache, path, info, config->grace, &tmpgerr); + filecache_open(config->cache_path, config->cache, path, info, config->grace, write_flag(info->flags), &tmpgerr); if (tmpgerr) { g_propagate_prefixed_error(gerr, tmpgerr, "do_open: "); return; @@ -1854,17 +1870,6 @@ static void do_open(const char *path, struct fuse_file_info *info, GError **gerr return; } -static bool write_flag(int flags) { - // O_RDWR technically belongs in the list, but since it might be used for files - // which are only read, I leave it out. - // O_CREAT on a file which already exists is a noop unless O_EXCL is also included. - // So, only respond if both are present. - if ((flags & O_WRONLY) || ((flags & O_CREAT) && (flags & O_EXCL)) || (flags & O_TRUNC) || (flags & O_APPEND)) { - return true; - } - return false; -} - static int dav_open(const char *path, struct fuse_file_info *info) { struct fusedav_config *config = fuse_get_context()->private_data; GError *gerr = NULL; @@ -1878,11 +1883,6 @@ static int dav_open(const char *path, struct fuse_file_info *info) { BUMP(dav_open); - if (config->grace && use_saint_mode() && ((info->flags & O_TRUNC) || (info->flags & O_APPEND))) { - g_set_error(&gerr, fusedav_quark(), ENETDOWN, "trying to write in saint mode"); - return processed_gerror("dav_open: ", path, &gerr); - } - // There are circumstances where we read a write-only file, so if write-only // is specified, change to read-write. Otherwise, a read on that file will // return an EBADF. @@ -2138,7 +2138,6 @@ static int dav_create(const char *path, mode_t mode, struct fuse_file_info *info BUMP(dav_create); - log_print(LOG_INFO, SECTION_FUSEDAV_FILE, "CALLBACK: dav_create(%s, %04o)", path, mode); info->flags |= O_CREAT | O_TRUNC; diff --git a/src/props.c b/src/props.c index b8b80103..4470cf6c 100644 --- a/src/props.c +++ b/src/props.c @@ -389,12 +389,13 @@ int simple_propfind(const char *path, size_t depth, time_t last_updated, props_r char *header = NULL; char *query_string = NULL; long elapsed_time = 0; + rwp_t rwp = PROPFIND; // Set up the request handle. if (last_updated > 0) { asprintf(&query_string, "changes_since=%lu", last_updated); } - session = session_request_init(path, query_string, false, false); + session = session_request_init(path, query_string, false, rwp); if (!session || inject_error(props_error_spropfindsession)) { g_set_error(gerr, props_quark(), ENETDOWN, "%s(%s): failed to get request session", funcname, path); free(query_string); diff --git a/src/session.c b/src/session.c index 0ccae9b1..fe5fb9b8 100644 --- a/src/session.c +++ b/src/session.c @@ -1261,7 +1261,7 @@ static CURL *get_session(bool tmp_session) { return session; } -CURL *session_request_init(const char *path, const char *query_string, bool tmp_session, bool rw) { +CURL *session_request_init(const char *path, const char *query_string, bool tmp_session, rwp_t rwp) { CURL *session; char *full_url = NULL; char *escaped_path; @@ -1271,11 +1271,9 @@ CURL *session_request_init(const char *path, const char *query_string, bool tmp_ // Calls to this function, on detecting this error, set ENETDOWN, which is appropriate if (use_saint_mode()) { log_print(LOG_NOTICE, SECTION_SESSION_DEFAULT, "%s: already in saint mode", funcname); - if (rw) { - stats_counter("saint_write", 1, 1.0); - return NULL; - } - stats_counter("saint_read", 1, 1.0); + if (rwp == WRITE) stats_counter("saint_write", 1, 1.0); + else if (rwp == READ) stats_counter("saint_read", 1, 1.0); + else if (rwp == PROPFIND) stats_counter("saint_propfind", 1, 1.0); return NULL; } diff --git a/src/session.h b/src/session.h index a14a3f73..35a0abf1 100644 --- a/src/session.h +++ b/src/session.h @@ -24,8 +24,10 @@ extern int num_filesystem_server_nodes; +typedef enum { READ, WRITE, PROPFIND } rwp_t; + int session_config_init(char *base, char *ca_cert, char *client_cert, bool grace); -CURL *session_request_init(const char *path, const char *query_string, bool temporary_handle, bool rw); +CURL *session_request_init(const char *path, const char *query_string, bool temporary_handle, rwp_t rwp); void session_config_free(void); bool process_status(const char *fcn_name, CURL *session, const CURLcode res, const long response_code, const long elapsed_time, const int iter, From c75847d7aad2fd934b4b31595c46b07d2d954d06 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Fri, 26 Jul 2019 16:49:33 +0000 Subject: [PATCH 03/21] pass content sha512 ... so that the server can redirect us without needing to read the data. --- src/filecache.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index 2dd1bd1e..d9c53834 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1078,15 +1078,40 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) CURL *session; struct curl_slist *slist = NULL; FILE *fp; + GChecksum *checksum; fp = fdopen(dup(fd), "r"); if (!fp) { g_set_error(gerr, system_quark(), errno, "%s: NULL fp from fdopen on fd %d for path %s", funcname, fd, path); - goto finish; + goto ifinish; } if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); - goto finish; + goto ifinish; + } + + int num_read, is_eof, is_ferror; + guchar fbytes[1024]; + num_read = is_eof = is_ferror = 0; + checksum = g_checksum_new(G_CHECKSUM_SHA512); + while (is_eof == 0) { + num_read = fread(fbytes, sizeof(fbytes), 1, fp); + if (num_read == 0) { + if ((is_ferror = ferror(fp)) != 0) { + g_set_error(gerr, system_quark(), errno, "%s: fread for checksum error on path %s", funcname, path); + goto ifinish; + } + if ((is_eof = feof(fp)) == 0) { + g_set_error(gerr, system_quark(), errno, "%s: fread for checksum no eof short read on path %s", funcname, path); + goto ifinish; + } + } + g_checksum_update(checksum, fbytes, sizeof(fbytes)); + } + + if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { + g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); + goto ifinish; } // REVIEW: We didn't use to check for sesssion == NULL, so now we @@ -1096,7 +1121,7 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) g_set_error(gerr, curl_quark(), E_FC_CURLERR, "%s: Failed session_request_init on PUT", funcname); // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); - goto finish; + goto ifinish; } curl_easy_setopt(session, CURLOPT_CUSTOMREQUEST, "PUT"); @@ -1104,6 +1129,11 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) curl_easy_setopt(session, CURLOPT_INFILESIZE, st.st_size); curl_easy_setopt(session, CURLOPT_READDATA, (void *) fp); + char* sha512_header; + asprintf(&sha512_header, "If-None-Match: %s", g_checksum_get_string); + slist = curl_slist_append(slist, sha512_header); + free(sha512_header); + slist = enhanced_logging(slist, LOG_DYNAMIC, SECTION_FILECACHE_COMM, "put_return_tag: %s", path); if (slist) curl_easy_setopt(session, CURLOPT_HTTPHEADER, slist); @@ -1114,15 +1144,21 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) timed_curl_easy_perform(session, &res, &response_code, &elapsed_time); - fclose(fp); - if (slist) curl_slist_free_all(slist); bool non_retriable_error = process_status("put", session, res, response_code, elapsed_time, idx, path, false); // Some errors should not be retried. (Non-errors will fail the // for loop test and fall through naturally) if (non_retriable_error) break; - } + + ifinish: + // close only if we successfully opened + if (fp) { + fclose(fp); + } + g_checksum_free(checksum); + goto finish; + } // end for loop if ((res != CURLE_OK || response_code >= 500) || inject_error(filecache_error_etagcurl1)) { trigger_saint_event(CLUSTER_FAILURE, "put"); @@ -1238,7 +1274,6 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) log_print(LOG_DEBUG, SECTION_FILECACHE_COMM, "PUT returns etag: %s", etag); finish: - log_print(LOG_DEBUG, SECTION_FILECACHE_FLOCK, "%s: releasing exclusive file lock on fd %d", funcname, fd); if (flock(fd, LOCK_UN) || inject_error(filecache_error_etagflock2)) { g_set_error(gerr, system_quark(), errno, "%s: error releasing exclusive file lock", funcname); From 217f17b6e4f55074c63a744445a2385c477a0563 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 12:35:18 +0000 Subject: [PATCH 04/21] derp --- src/filecache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filecache.c b/src/filecache.c index d9c53834..86aff6bf 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1130,7 +1130,7 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) curl_easy_setopt(session, CURLOPT_READDATA, (void *) fp); char* sha512_header; - asprintf(&sha512_header, "If-None-Match: %s", g_checksum_get_string); + asprintf(&sha512_header, "If-None-Match: %s", g_checksum_get_string(checksum)); slist = curl_slist_append(slist, sha512_header); free(sha512_header); From 8bb57ab80654251ef7d9bab3d936757dcce8be37 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 12:59:14 +0000 Subject: [PATCH 05/21] md5 as etag, sha512 in other header gcs uses md5 as the etag, and we store the etag from gcs, not the one from the 307 response from Valhalla. --- src/filecache.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index 86aff6bf..4cf4415b 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1078,7 +1078,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) CURL *session; struct curl_slist *slist = NULL; FILE *fp; - GChecksum *checksum; + GChecksum *sha512_checksum; + GChecksum *md5_checksum; fp = fdopen(dup(fd), "r"); if (!fp) { @@ -1093,7 +1094,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) int num_read, is_eof, is_ferror; guchar fbytes[1024]; num_read = is_eof = is_ferror = 0; - checksum = g_checksum_new(G_CHECKSUM_SHA512); + sha512_checksum = g_checksum_new(G_CHECKSUM_SHA512); + md5_checksum = g_checksum_new(G_CHECKSUM_MD5); while (is_eof == 0) { num_read = fread(fbytes, sizeof(fbytes), 1, fp); if (num_read == 0) { @@ -1106,7 +1108,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) goto ifinish; } } - g_checksum_update(checksum, fbytes, sizeof(fbytes)); + g_checksum_update(sha512_checksum, fbytes, sizeof(fbytes)); + g_checksum_update(md5_checksum, fbytes, sizeof(fbytes)); } if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { @@ -1130,10 +1133,15 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) curl_easy_setopt(session, CURLOPT_READDATA, (void *) fp); char* sha512_header; - asprintf(&sha512_header, "If-None-Match: %s", g_checksum_get_string(checksum)); + asprintf(&sha512_header, "X-Valhalla-SHA512: %s", g_checksum_get_string(sha512_checksum)); slist = curl_slist_append(slist, sha512_header); free(sha512_header); + char* md5_header; + asprintf(&md5_header, "If-None-Match: %s", g_checksum_get_string(md5_checksum)); + slist = curl_slist_append(slist, md5_header); + free(md5_header); + slist = enhanced_logging(slist, LOG_DYNAMIC, SECTION_FILECACHE_COMM, "put_return_tag: %s", path); if (slist) curl_easy_setopt(session, CURLOPT_HTTPHEADER, slist); From 7e1be3da7764c1bcc68a29d0b640a6c870d742f5 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 13:07:01 +0000 Subject: [PATCH 06/21] fixup --- src/filecache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/filecache.c b/src/filecache.c index 4cf4415b..5f508528 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1164,7 +1164,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) if (fp) { fclose(fp); } - g_checksum_free(checksum); + g_checksum_free(sha512_checksum); + g_checksum_free(md5_checksum); goto finish; } // end for loop From 732ced415a0dba04f5118d7e771c6972106d4f80 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 14:20:15 +0000 Subject: [PATCH 07/21] clean up error handling --- src/filecache.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index 5f508528..9c33411d 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1084,11 +1084,11 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) fp = fdopen(dup(fd), "r"); if (!fp) { g_set_error(gerr, system_quark(), errno, "%s: NULL fp from fdopen on fd %d for path %s", funcname, fd, path); - goto ifinish; + break; } if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); - goto ifinish; + break; } int num_read, is_eof, is_ferror; @@ -1101,11 +1101,11 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) if (num_read == 0) { if ((is_ferror = ferror(fp)) != 0) { g_set_error(gerr, system_quark(), errno, "%s: fread for checksum error on path %s", funcname, path); - goto ifinish; + break; } if ((is_eof = feof(fp)) == 0) { g_set_error(gerr, system_quark(), errno, "%s: fread for checksum no eof short read on path %s", funcname, path); - goto ifinish; + break; } } g_checksum_update(sha512_checksum, fbytes, sizeof(fbytes)); @@ -1114,17 +1114,14 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); - goto ifinish; + break; } - // REVIEW: We didn't use to check for sesssion == NULL, so now we - // also call try_release_request_outstanding. Is this OK? session = session_request_init(path, NULL, false, rwp); if (!session || inject_error(filecache_error_freshsession)) { g_set_error(gerr, curl_quark(), E_FC_CURLERR, "%s: Failed session_request_init on PUT", funcname); - // TODO(kibra): Manually cleaning up this lock sucks. We should make sure this happens in a better way. try_release_request_outstanding(); - goto ifinish; + break; } curl_easy_setopt(session, CURLOPT_CUSTOMREQUEST, "PUT"); @@ -1159,15 +1156,17 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) // for loop test and fall through naturally) if (non_retriable_error) break; - ifinish: - // close only if we successfully opened - if (fp) { - fclose(fp); - } - g_checksum_free(sha512_checksum); - g_checksum_free(md5_checksum); - goto finish; } // end for loop + // close only if we successfully opened + if (fp) { + fclose(fp); + } + g_checksum_free(sha512_checksum); + g_checksum_free(md5_checksum); + + if (gerr) { + goto finish; + } if ((res != CURLE_OK || response_code >= 500) || inject_error(filecache_error_etagcurl1)) { trigger_saint_event(CLUSTER_FAILURE, "put"); From 38caa6a717d03bef6b214baddd165c701c64f114 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 14:27:20 +0000 Subject: [PATCH 08/21] push fp up in scope --- src/filecache.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index 9c33411d..dc8a4c56 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1070,6 +1070,17 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) log_print(LOG_DEBUG, SECTION_FILECACHE_COMM, "%s: file size %d", funcname, st.st_size); + FILE *fp; + fp = fdopen(dup(fd), "r"); + if (!fp) { + g_set_error(gerr, system_quark(), errno, "%s: NULL fp from fdopen on fd %d for path %s", funcname, fd, path); + break; + } + if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { + g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); + break; + } + // If we're in saint mode, skip the PUT altogether for (int idx = 0; idx < num_filesystem_server_nodes && (res != CURLE_OK || response_code >= 500); @@ -1077,22 +1088,11 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) long elapsed_time = 0; CURL *session; struct curl_slist *slist = NULL; - FILE *fp; GChecksum *sha512_checksum; GChecksum *md5_checksum; - - fp = fdopen(dup(fd), "r"); - if (!fp) { - g_set_error(gerr, system_quark(), errno, "%s: NULL fp from fdopen on fd %d for path %s", funcname, fd, path); - break; - } - if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { - g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); - break; - } - int num_read, is_eof, is_ferror; guchar fbytes[1024]; + num_read = is_eof = is_ferror = 0; sha512_checksum = g_checksum_new(G_CHECKSUM_SHA512); md5_checksum = g_checksum_new(G_CHECKSUM_MD5); From 89369d50e990c45dba9f6c49ab005e202d47ad01 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 14:36:34 +0000 Subject: [PATCH 09/21] fixup --- src/filecache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index dc8a4c56..856eab7a 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1070,15 +1070,17 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) log_print(LOG_DEBUG, SECTION_FILECACHE_COMM, "%s: file size %d", funcname, st.st_size); + GChecksum *sha512_checksum; + GChecksum *md5_checksum; FILE *fp; fp = fdopen(dup(fd), "r"); if (!fp) { g_set_error(gerr, system_quark(), errno, "%s: NULL fp from fdopen on fd %d for path %s", funcname, fd, path); - break; + goto finish; } if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { g_set_error(gerr, system_quark(), errno, "%s: fseek error on path %s", funcname, path); - break; + goto finish; } // If we're in saint mode, skip the PUT altogether @@ -1088,8 +1090,6 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) long elapsed_time = 0; CURL *session; struct curl_slist *slist = NULL; - GChecksum *sha512_checksum; - GChecksum *md5_checksum; int num_read, is_eof, is_ferror; guchar fbytes[1024]; From 429698f138f1ad6649b58bea4c1fcd8f320c0787 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 14:50:41 +0000 Subject: [PATCH 10/21] nil Content-Type to appease the GCS signed url on PUT --- src/filecache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/filecache.c b/src/filecache.c index 856eab7a..baf23f01 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1139,6 +1139,7 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) slist = curl_slist_append(slist, md5_header); free(md5_header); + slist = curl_slist_append(slist, "Content-Type:"); slist = enhanced_logging(slist, LOG_DYNAMIC, SECTION_FILECACHE_COMM, "put_return_tag: %s", path); if (slist) curl_easy_setopt(session, CURLOPT_HTTPHEADER, slist); From 445c04687246e24d8d618ecfc6375bf7eb5d5f5e Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 28 Jul 2019 18:42:17 +0000 Subject: [PATCH 11/21] fix number of bytes passed to g_checksum_update --- src/filecache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index baf23f01..39d2894a 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -1097,7 +1097,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) sha512_checksum = g_checksum_new(G_CHECKSUM_SHA512); md5_checksum = g_checksum_new(G_CHECKSUM_MD5); while (is_eof == 0) { - num_read = fread(fbytes, sizeof(fbytes), 1, fp); + // not sure of the performance implications of reading one byte out n times. + num_read = fread(fbytes, 1, sizeof(fbytes), fp); if (num_read == 0) { if ((is_ferror = ferror(fp)) != 0) { g_set_error(gerr, system_quark(), errno, "%s: fread for checksum error on path %s", funcname, path); @@ -1108,8 +1109,8 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) break; } } - g_checksum_update(sha512_checksum, fbytes, sizeof(fbytes)); - g_checksum_update(md5_checksum, fbytes, sizeof(fbytes)); + g_checksum_update(sha512_checksum, fbytes, num_read); + g_checksum_update(md5_checksum, fbytes, num_read); } if (fseek(fp, 0L, SEEK_SET) == (off_t)-1) { From 4420e4eb9a0128bad25228a1cb870f972c318629 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Thu, 1 Aug 2019 15:46:20 +0000 Subject: [PATCH 12/21] build docker images So we can run them in kube. --- .circleci/config.yml | 38 +++++++++++++++++++++++++++++++------- Dockerfile | 6 ++++++ scripts/docker-outer.sh | 19 +++++++++++++++++-- 3 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 Dockerfile diff --git a/.circleci/config.yml b/.circleci/config.yml index 4b142b59..7af29ae3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -20,9 +20,8 @@ executors: - image: circleci/ruby:2.4 jobs: - build-test: + build-test-22: executor: python-executor - parallelism: 1 steps: - checkout - setup_remote_docker @@ -39,8 +38,30 @@ jobs: /usr/local/bin/cpplint --filter=-whitespace/line_length src/* true - run: - name: Build the image - command: bash scripts/docker-outer.sh + name: Build the packages + command: BUILD_VERSIONS=22 bash scripts/docker-outer.sh + - *persist_to_workspace + + build-test-28: + executor: python-executor + steps: + - checkout + - setup_remote_docker + - run: + name: Install cpplint + command: sudo pip install cpplint + - run: + name: Set up version + command: bash scripts/version.sh + - run: + name: Call cpplint; expected to fail, so set things up to pass + command: | + set +eo pipefail + /usr/local/bin/cpplint --filter=-whitespace/line_length src/* + true + - run: + name: Build the packages + command: BUILD_VERSIONS=28 bash scripts/docker-outer.sh - *persist_to_workspace deploy-dev: @@ -71,10 +92,12 @@ workflows: version: 2 build-test-deploy: jobs: - - build-test + - build-test-22 + - build-test-28 - deploy-dev: requires: - - build-test + - build-test-22 + - build-test-28 filters: branches: only: @@ -83,7 +106,8 @@ workflows: - stage - deploy-prod: requires: - - build-test + - build-test-22 + - build-test-28 filters: branches: only: diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..e7e896ca --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +ARG VERSION + +FROM quay.io/getpantheon/fedora:${VERSION} + +COPY pkg/fusedav /opt/fusedav +RUN dnf install -y /opt/fusedav/*.rpm && rm -r /opt/fusedav diff --git a/scripts/docker-outer.sh b/scripts/docker-outer.sh index 80342b7f..aacba7e5 100755 --- a/scripts/docker-outer.sh +++ b/scripts/docker-outer.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash set -e bin="$(cd -P -- "$(dirname -- "$0")" && pwd -P)" docker=$(which docker) @@ -21,6 +21,8 @@ $docker volume create fusedav_vol $docker run --name cp-vol -v fusedav_vol:/fusedav busybox true $docker cp $bin/../. cp-vol:/fusedav/ +docker login -p "$QUAY_PASSWD" -u "$QUAY_USER" quay.io + # epoch to use for -revision epoch=$(date +%s) @@ -48,8 +50,21 @@ EOL echo "Running: $docker_cmd" $docker_cmd + echo "copying the rpm from the container..." + mkdir -p "$bin/../pkg" + $docker cp "cp-vol:/fusedav/pkg/${ver}/fusedav" "$bin/../pkg/fusedav" + + docker_build="$docker build -t quay.io/getpantheon/fusedav:f${ver}-${CIRCLE_BUILD_NUM} --build-arg VERSION=${ver} ." + + echo "Running: $docker_build" + $docker_build + + docker_push="$docker push quay.io/getpantheon/fusedav:f${ver}-${CIRCLE_BUILD_NUM}" + + echo "Running: $docker_push" + $docker_push + done -$docker cp cp-vol:/fusedav/pkg $bin/../pkg/ $docker rm cp-vol $docker volume rm fusedav_vol From b273528af2da2428d43330090c08b18673c84134 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Fri, 2 Aug 2019 14:46:40 +0000 Subject: [PATCH 13/21] add jemalloc dependency --- scripts/docker-inner.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/docker-inner.sh b/scripts/docker-inner.sh index 3a9d147c..9e14c7f9 100755 --- a/scripts/docker-inner.sh +++ b/scripts/docker-inner.sh @@ -79,6 +79,7 @@ fpm -s dir -t rpm \ --depends uriparser \ --depends fuse-libs \ --depends leveldb \ + --depends jemalloc \ --log=debug \ $install_prefix \ /usr/sbin/mount.$name From cd68df43d7fd6d3d04e01ba054b3bdebed3813fa Mon Sep 17 00:00:00 2001 From: Will Milton Date: Fri, 2 Aug 2019 13:39:02 +0000 Subject: [PATCH 14/21] try adding stdout logging --- src/fusedav_config.c | 8 ++++++-- src/fusedav_config.h | 1 + src/log.c | 37 ++++++++++++++++++++++++++++++------- src/log.h | 8 +++++++- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/src/fusedav_config.c b/src/fusedav_config.c index 2b925547..5e8ea1f5 100644 --- a/src/fusedav_config.c +++ b/src/fusedav_config.c @@ -78,7 +78,7 @@ static int fusedav_opt_proc(void *data, const char *arg, int key, struct fuse_ar break; case KEY_IGNORE: - return 0; + return 0; case KEY_HELP: fprintf(stderr, @@ -130,6 +130,7 @@ static void print_config(struct fusedav_config *config) { log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "log_level %d", config->log_level); log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "log_level_by_section %s", config->log_level_by_section); log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "log_prefix %s", config->log_prefix); + log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "log_destination %s", config->log_destination); log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "max_file_size %d", config->max_file_size); log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "statsd_host %s", config->statsd_host); log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "statsd_port %s", config->statsd_port); @@ -156,6 +157,7 @@ run_as_gid=6f7a106722f74cc7bd96d4d06785ed78 log_level=5 log_level_by_section=0 log_prefix=6f7a106722f74cc7bd96d4d06785ed78 +log_destination=journal max_file_size=256 statsd_host=127.0.0.1 statsd_port=8126 @@ -198,6 +200,7 @@ static void parse_configs(struct fusedav_config *config, GError **gerr) { keytuple(fusedav, log_level, INT), keytuple(fusedav, log_level_by_section, STRING), keytuple(fusedav, log_prefix, STRING), + keytuple(fusedav, log_destination, STRING), keytuple(fusedav, max_file_size, INT), keytuple(fusedav, statsd_host, STRING), keytuple(fusedav, statsd_port, STRING), @@ -290,6 +293,7 @@ void configure_fusedav(struct fusedav_config *config, struct fuse_args *args, ch config->nodaemon = false; config->max_file_size = 256; // 256M config->log_level = 5; // default log_level: LOG_NOTICE + config->log_destination = JOURNAL; asprintf(&config->statsd_host, "%s", "127.0.0.1"); asprintf(&config->statsd_port, "%s", "8126"); @@ -312,7 +316,7 @@ void configure_fusedav(struct fusedav_config *config, struct fuse_args *args, ch asprintf(&user_agent, "FuseDAV/%s %s", PACKAGE_VERSION, config->log_prefix); - log_init(config->log_level, config->log_level_by_section, config->log_prefix); + log_init(config->log_level, config->log_level_by_section, config->log_prefix, config->log_destination); log_print(LOG_DEBUG, SECTION_CONFIG_DEFAULT, "log_level: %d.", config->log_level); if (stats_init(config->statsd_host, config->statsd_port) < 0) { diff --git a/src/fusedav_config.h b/src/fusedav_config.h index d32b3908..fc66d25c 100644 --- a/src/fusedav_config.h +++ b/src/fusedav_config.h @@ -52,6 +52,7 @@ struct fusedav_config { int log_level; char *log_level_by_section; char *log_prefix; + char *log_destination; int max_file_size; char *statsd_host; char *statsd_port; diff --git a/src/log.c b/src/log.c index 6dd6d3b4..9a5324a7 100644 --- a/src/log.c +++ b/src/log.c @@ -53,10 +53,19 @@ __thread unsigned int LOG_DYNAMIC = LOG_INFO; // max size for strings in log_key_value array #define KVITEM_SIZE 64 +static const char *log_template = "{\"MESSAGE\": \"%s%s\", " + "\"PRIORITY\": %d, " + "\"USER_AGENT\": \"%s\", " + "\"SITE\": \"%s\", " + "\"ENVIRONMENT\": \"%s\", " + "\"HOST_ADDRESS\": \"%s\", " + "\"TID\": \"%lu\", " + "\"PACKAGE_VERSION\": \"%s\"}\n"; + static unsigned int global_log_level = 5; static unsigned int section_log_levels[SECTIONS] = {0}; static const char *log_key_value[KVITEMS]; - +static enum log_destination log_destination = JOURNAL; static const char *errlevel[] = {"EMERG: ", "ALERT: ", "CRIT: ", "ERR: ", "WARN: ", "NOTICE: ", "INFO: ", "DEBUG: "}; // From the base url get the site id and site env @@ -85,7 +94,7 @@ static void initialize_site(void) { } /* The log_prefix comes from fusedav.conf; the base_url from curl and fuse. */ -void log_init(unsigned int log_level, const char *log_level_by_section, const char *user_agent_abbrev) { +void log_init(unsigned int log_level, const char *log_level_by_section, const char *user_agent_abbrev, const char *destination) { unsigned int vlen; @@ -100,6 +109,10 @@ void log_init(unsigned int log_level, const char *log_level_by_section, const ch log_key_value[USER_AGENT_ABBREV] = "(null)"; } + if ((destination != NULL) && strncmp(destination, "stdout", sizeof("stdout")) == 0) { + log_destination = STDOUT; + } + initialize_site(); if (log_level_by_section == NULL) return; @@ -168,9 +181,20 @@ int logging(unsigned int log_level, unsigned int section) { } static int print_it(const char const *formatwithtid, const char const *msg, int log_level) { - int ret; - // fusedav-server standardizing on names BINDING, SITE, and ENVIRONMENT - ret = sd_journal_send("MESSAGE=%s%s", formatwithtid, msg, + if (log_destination == STDOUT) { + printf(log_template, + formatwithtid, msg, + log_level, + get_user_agent(), + log_key_value[SITE], + log_key_value[ENVIRONMENT], + log_key_value[HOST_ADDRESS], + syscall(SYS_gettid), + PACKAGE_VERSION, + NULL); + return 0; + } + return sd_journal_send("MESSAGE=%s%s", formatwithtid, msg, "PRIORITY=%d", log_level, "USER_AGENT=%s", get_user_agent(), "SITE=%s", log_key_value[SITE], @@ -179,7 +203,6 @@ static int print_it(const char const *formatwithtid, const char const *msg, int "TID=%lu", syscall(SYS_gettid), "PACKAGE_VERSION=%s", PACKAGE_VERSION, NULL); - return ret; } #define max_msg_sz 2048 @@ -196,7 +219,7 @@ int log_print(unsigned int log_level, unsigned int section, const char *format, assert(formatwithlevel); // print the intended message - ret = print_it(formatwithlevel, msg, log_level); + print_it(formatwithlevel, msg, log_level); // Check and see if we're no longer doing dynamic logging. If so, it will take effect after this call. Then print a message if (turning_off_dynamic_logging()) { diff --git a/src/log.h b/src/log.h index 971436fe..8c533867 100644 --- a/src/log.h +++ b/src/log.h @@ -22,7 +22,13 @@ extern __thread unsigned int LOG_DYNAMIC; -void log_init(unsigned int log_level, const char *log_level_by_section, const char *user_agent); +enum log_destination { + JOURNAL, + STDOUT +}; + + +void log_init(unsigned int log_level, const char *log_level_by_section, const char *user_agent, const char *destination); int log_print(unsigned int log_level, unsigned int section, const char *format, ...); int logging(unsigned int log_level, unsigned int section); void set_dynamic_logging(void); From 0ce9b7683da900575461abf890786981a86e3b00 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Tue, 6 Aug 2019 13:08:35 +0000 Subject: [PATCH 15/21] add dev tools to docker image --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e7e896ca..aced9af1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,4 +3,6 @@ ARG VERSION FROM quay.io/getpantheon/fedora:${VERSION} COPY pkg/fusedav /opt/fusedav -RUN dnf install -y /opt/fusedav/*.rpm && rm -r /opt/fusedav +RUN dnf install -y /opt/fusedav/*.rpm make perf valgrind gdb \ + && rm -r /opt/fusedav \ + && dnf clean all From 30854e21599d8c8655490ac997534f0240984b94 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Mon, 5 Aug 2019 13:26:15 +0000 Subject: [PATCH 16/21] build "devel" packages --- .circleci/config.yml | 60 +++++++++++++++++++++++++++++++++++++++++ scripts/docker-outer.sh | 10 ++++--- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7af29ae3..2600b274 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -64,6 +64,50 @@ jobs: command: BUILD_VERSIONS=28 bash scripts/docker-outer.sh - *persist_to_workspace + build-test-22-devel: + executor: python-executor + steps: + - checkout + - setup_remote_docker + - run: + name: Install cpplint + command: sudo pip install cpplint + - run: + name: Set up version + command: bash scripts/version.sh + - run: + name: Call cpplint; expected to fail, so set things up to pass + command: | + set +eo pipefail + /usr/local/bin/cpplint --filter=-whitespace/line_length src/* + true + - run: + name: Build the packages + command: BUILD_VERSIONS=22 DEVEL=-devel bash scripts/docker-outer.sh + - *persist_to_workspace + + build-test-28-devel: + executor: python-executor + steps: + - checkout + - setup_remote_docker + - run: + name: Install cpplint + command: sudo pip install cpplint + - run: + name: Set up version + command: bash scripts/version.sh + - run: + name: Call cpplint; expected to fail, so set things up to pass + command: | + set +eo pipefail + /usr/local/bin/cpplint --filter=-whitespace/line_length src/* + true + - run: + name: Build the packages + command: BUILD_VERSIONS=28 DEVEL=-devel bash scripts/docker-outer.sh + - *persist_to_workspace + deploy-dev: executor: ruby-executor steps: @@ -94,6 +138,22 @@ workflows: jobs: - build-test-22 - build-test-28 + - build-test-22-devel: + filters: + branches: + ignore: + - dev + - yolo + - stage + - master + - build-test-28-devel: + filters: + branches: + ignore: + - dev + - yolo + - stage + - master - deploy-dev: requires: - build-test-22 diff --git a/scripts/docker-outer.sh b/scripts/docker-outer.sh index aacba7e5..56823f16 100755 --- a/scripts/docker-outer.sh +++ b/scripts/docker-outer.sh @@ -13,6 +13,8 @@ RUN_ARGS="--rm" # set a default build -> 0 for when it doesn't exist CIRCLE_BUILD_NUM=${CIRCLE_BUILD_NUM:-0} +DEVEL=${DEVEL:-} + # location to mount the source in the container inner_mount="/fusedav" @@ -33,7 +35,7 @@ for ver in $BUILD_VERSIONS; do $docker pull $build_image channel=$(tr -d "\n\r" < $bin/../CHANNEL) - exec_cmd="$inner_mount/scripts/docker-inner.sh $channel $inner_mount/pkg $CIRCLE_BUILD_NUM $epoch" + exec_cmd="$inner_mount/scripts/docker-inner.sh $channel$DEVEL $inner_mount/pkg $CIRCLE_BUILD_NUM $epoch" if [ -n "$BUILD_DEBUG" ] ; then RUN_ARGS="$RUN_ARGS -ti " exec_cmd="/bin/bash" @@ -53,13 +55,13 @@ EOL echo "copying the rpm from the container..." mkdir -p "$bin/../pkg" $docker cp "cp-vol:/fusedav/pkg/${ver}/fusedav" "$bin/../pkg/fusedav" - - docker_build="$docker build -t quay.io/getpantheon/fusedav:f${ver}-${CIRCLE_BUILD_NUM} --build-arg VERSION=${ver} ." + docker_tag="quay.io/getpantheon/fusedav:f${ver}-${CIRCLE_BUILD_NUM}${DEVEL}" + docker_build="$docker build -t $docker_tag --build-arg VERSION=${ver} ." echo "Running: $docker_build" $docker_build - docker_push="$docker push quay.io/getpantheon/fusedav:f${ver}-${CIRCLE_BUILD_NUM}" + docker_push="$docker push $docker_tag" echo "Running: $docker_push" $docker_push From 5fcc52d9452c5d32b12dfdebcd1846fb76470860 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Tue, 6 Aug 2019 13:33:36 +0000 Subject: [PATCH 17/21] add iozone and sources and tests to devel builds --- scripts/docker-inner.sh | 31 +++++++++++++++++++++++++++++-- sha256sum | 1 + 2 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 sha256sum diff --git a/scripts/docker-inner.sh b/scripts/docker-inner.sh index 9e14c7f9..78296ea4 100755 --- a/scripts/docker-inner.sh +++ b/scripts/docker-inner.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # set -ex @@ -14,6 +14,11 @@ rpm_dir=$2 build=$3 epoch=$4 +devel= +if [[ "$fusedav_channel" == *-devel ]]; then + devel=true +fi + fedora_release=$(rpm -q --queryformat '%{VERSION}\n' fedora-release) GITSHA=$(git log -1 --format="%h") name="fusedav-$fusedav_channel" @@ -64,10 +69,31 @@ if [ "1" != "$?" ] ; then fi set -e +# pack in sources and tests into the rpm +if [[ -n $devel ]]; then + cp -r -t $install_prefix src tests + + iozone_version=iozone3_487 + curl http://www.iozone.org/src/current/${iozone_version}.tar > ${iozone_version}.tar + sha256sum -c sha256sum + + tar xf ${iozone_version}.tar + + pushd 2>&1 ${iozone_version}/src/current > /dev/null + make linux-AMD64 + mv iozone $install_prefix/iozone + popd 2>&1 > /dev/null +fi + mv /usr/local/bin/fusedav $install_prefix/$name cp $bin/exec_wrapper/mount.fusedav_chan /usr/sbin/mount.$name chmod 755 /usr/sbin/mount.$name +DEP_GCC= +if [[ -n $devel ]]; then + DEP_GCC="--depends gcc" +fi + fpm -s dir -t rpm \ --name "${name}" \ --version "${version}" \ @@ -76,10 +102,11 @@ fpm -s dir -t rpm \ --url "${url}" \ --vendor "${vendor}" \ --description "${description}" \ - --depends uriparser \ + --depends uriparser \ --depends fuse-libs \ --depends leveldb \ --depends jemalloc \ + ${DEP_GCC} \ --log=debug \ $install_prefix \ /usr/sbin/mount.$name diff --git a/sha256sum b/sha256sum new file mode 100644 index 00000000..95e4583e --- /dev/null +++ b/sha256sum @@ -0,0 +1 @@ +2c488a7ccddd624fd557af16e71442c367b131d6178e1b4023bbd532bacdda59 iozone3_487.tar From 6deaa470e8733db0f5565782d8d3b6618fba1bed Mon Sep 17 00:00:00 2001 From: Will Milton Date: Wed, 7 Aug 2019 16:05:13 +0000 Subject: [PATCH 18/21] trigger integration in fusedav-kube --- .circleci/config.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2600b274..c1460377 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -69,6 +69,9 @@ jobs: steps: - checkout - setup_remote_docker + - add_ssh_keys: + fingerprints: + - "17:da:1d:21:58:46:74:fc:38:4a:62:2a:32:79:1d:ab" - run: name: Install cpplint command: sudo pip install cpplint @@ -84,6 +87,13 @@ jobs: - run: name: Build the packages command: BUILD_VERSIONS=22 DEVEL=-devel bash scripts/docker-outer.sh + - run: + name: trigger integration + command: | + git clone git@github.com/pantheon-systems/fusedav-kube + cd fusedav-kube + git tag f22-${CIRCLE_BUILD_NUM}-devel + git push origin f22-${CIRCLE_BUILD_NUM}-devel - *persist_to_workspace build-test-28-devel: @@ -91,6 +101,9 @@ jobs: steps: - checkout - setup_remote_docker + - add_ssh_keys: + fingerprints: + - "17:da:1d:21:58:46:74:fc:38:4a:62:2a:32:79:1d:ab" - run: name: Install cpplint command: sudo pip install cpplint @@ -106,6 +119,13 @@ jobs: - run: name: Build the packages command: BUILD_VERSIONS=28 DEVEL=-devel bash scripts/docker-outer.sh + - run: + name: trigger integration + command: | + git clone git@github.com/pantheon-systems/fusedav-kube + cd fusedav-kube + git tag f28-${CIRCLE_BUILD_NUM}-devel + git push origin f28-${CIRCLE_BUILD_NUM}-devel - *persist_to_workspace deploy-dev: From 2d2c3fce311549719f4f3762680171ea70483076 Mon Sep 17 00:00:00 2001 From: Will Milton Date: Wed, 7 Aug 2019 16:16:58 +0000 Subject: [PATCH 19/21] try some git config --- .circleci/config.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c1460377..baf69227 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -87,10 +87,15 @@ jobs: - run: name: Build the packages command: BUILD_VERSIONS=22 DEVEL=-devel bash scripts/docker-outer.sh + - run: + name: git config + command: | + git config --global user.email "$CIRCLE_USERNAME@users.noreply.github.com" + git config --global user.name "$CIRCLE_USERNAME" - run: name: trigger integration command: | - git clone git@github.com/pantheon-systems/fusedav-kube + git clone git@github.com:pantheon-systems/fusedav-kube cd fusedav-kube git tag f22-${CIRCLE_BUILD_NUM}-devel git push origin f22-${CIRCLE_BUILD_NUM}-devel @@ -119,10 +124,15 @@ jobs: - run: name: Build the packages command: BUILD_VERSIONS=28 DEVEL=-devel bash scripts/docker-outer.sh + - run: + name: git config + command: | + git config --global user.email "$CIRCLE_USERNAME@users.noreply.github.com" + git config --global user.name "$CIRCLE_USERNAME" - run: name: trigger integration command: | - git clone git@github.com/pantheon-systems/fusedav-kube + git clone git@github.com:pantheon-systems/fusedav-kube cd fusedav-kube git tag f28-${CIRCLE_BUILD_NUM}-devel git push origin f28-${CIRCLE_BUILD_NUM}-devel From d3aa10fa7df1a0ea3dd69453b79dafc6acb9ef7a Mon Sep 17 00:00:00 2001 From: Will Milton Date: Wed, 31 Jul 2019 11:39:40 +0000 Subject: [PATCH 20/21] propfind invalidates filecache Before this change, we only serve from the file cache if the content was updated by a call to the server in the last 2 seconds, or the thread is in saint mode. The reason is that we do a conditional get using the If-None-Match header and the Etag from the last response for that file path, hoping to get a relatively fast 304 most of the time. However, querying total duration of requests broken down by cluster, method, and status shows that our 304 GETs are using just as much time in process as normal GETs and PUTs due to sheer volume. This change invalidates the file cache for a path when a PROPFIND shows that the path has been updated in Valhalla. The mechanism for invalidation is to set the timestamp in `filecache_pdata->last_server_update` to 1 for the path in question, and to serve all paths from file cache if the entry isn't invalidated. This arrangement affords serving invalidated content while in saint mode, and aggressive removal of known invalid content by the file cache cleanup mechanism. An optimization we could consider is to use the etag to locate the file cache content, in order to provide deduplication and potentially better cache coherency for the case where the same file content moves between paths. We would need to add the etag to the propfind data, parse it out, pass it to the callback, and use that to evaluate the freshness of the file cache data, and also change the logic for naming a cache file to rename the file after its hash is known, but before publishing file cache data to leveldb. Since the etag and the filename are both properties of the file cache data, we would need to either change the format of the stat cache data, add another entry type, or add a concept of "unfilled" file cache data. --- src/filecache.c | 32 +++++++++++++++++++++++++++----- src/filecache.h | 4 +++- src/fusedav.c | 6 +++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index 39d2894a..4e711be5 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -386,7 +386,7 @@ static void get_fresh_fd(filecache_t *cache, // If we're in saint mode, don't go to the server if (pdata != NULL && ((flags & O_TRUNC) || use_local_copy || - (pdata->last_server_update == 0) || (time(NULL) - pdata->last_server_update) <= STAT_CACHE_NEGATIVE_TTL)) { + (pdata->last_server_update == 0) || pdata->last_server_update > FILECACHE_INVALIDATED)) { const float samplerate = 1.0; // always sample stat log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "%s: file is fresh or being truncated: %s::%s", funcname, path, pdata->filename); @@ -403,7 +403,7 @@ static void get_fresh_fd(filecache_t *cache, // I believe we can get here on a PUT, and succeed in getting a previous copy from the cache // This will eventually fail if we are already in saint mode, but should succeed for now if (use_local_copy) { - if (rw ) stats_counter("put_saint_mode_failure", 1, samplerate); + if (rw) stats_counter("put_saint_mode_failure", 1, samplerate); else stats_counter("get_saint_mode_failure", 1, samplerate); log_print(LOG_WARNING, SECTION_FILECACHE_OPEN, "%s: get_saint_mode_failure on file: %s::%s", funcname, path, pdata->filename); } else { @@ -1047,6 +1047,9 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) // If in saint mode and we count this request as a saint_write in get_fresh_fd(), we would be // double counting; but I don't see how we get here at all if we detect saint mode in get_fresh_fd() rwp_t rwp = WRITE; + GChecksum *sha512_checksum; + GChecksum *md5_checksum; + FILE *fp; BUMP(filecache_return_etag); @@ -1070,9 +1073,6 @@ static void put_return_etag(const char *path, int fd, char *etag, GError **gerr) log_print(LOG_DEBUG, SECTION_FILECACHE_COMM, "%s: file size %d", funcname, st.st_size); - GChecksum *sha512_checksum; - GChecksum *md5_checksum; - FILE *fp; fp = fdopen(dup(fd), "r"); if (!fp) { g_set_error(gerr, system_quark(), errno, "%s: NULL fp from fdopen on fd %d for path %s", funcname, fd, path); @@ -1753,6 +1753,28 @@ void filecache_pdata_move(filecache_t *cache, const char *old_path, const char * return; } +// mark filecache stale for path +void filecache_invalidate(filecache_t* cache, const char* path, GError** gerr) { + struct filecache_pdata *pdata = NULL; + GError *subgerr = NULL; + const char* funcname = "filecache_invalidate"; + + pdata = filecache_pdata_get(cache, path, &subgerr); + if (gerr) { + g_propagate_prefixed_error(gerr, subgerr, "%s: ", funcname); + return; + } + // We mark the data invalidated by setting the last update very far in the past. + // This preserves the etag so we can still try it and get lucky, eg if the file + // content was updated, and then updated back to its previous value. + pdata->last_server_update = FILECACHE_INVALIDATED; + filecache_pdata_set(cache, path, pdata, &subgerr); + if (gerr) { + g_propagate_prefixed_error(gerr, subgerr, "%s: ", funcname); + return; + } +} + // Does *not* allocate a new string. static const char *key2path(const char *key) { char *prefix; diff --git a/src/filecache.h b/src/filecache.h index b8425070..3ddadb7d 100644 --- a/src/filecache.h +++ b/src/filecache.h @@ -37,13 +37,14 @@ #define E_FC_LDBERR EIO #define E_FC_CURLERR ENETDOWN #define E_FC_FILETOOLARGE EFBIG +#define FILECACHE_INVALIDATED 1 typedef leveldb_t filecache_t; void filecache_print_stats(void); void filecache_init(char *cache_path, GError **gerr); void filecache_delete(filecache_t *cache, const char *path, bool unlink, GError **gerr); -void filecache_open(char *cache_path, filecache_t *cache, const char *path, struct fuse_file_info *info, +void filecache_open(char *cache_path, filecache_t *cache, const char *path, struct fuse_file_info *info, bool grace, bool rw, GError **gerr); ssize_t filecache_read(struct fuse_file_info *info, char *buf, size_t size, off_t offset, GError **gerr); ssize_t filecache_write(struct fuse_file_info *info, const char *buf, size_t size, off_t offset, GError **gerr); @@ -54,6 +55,7 @@ int filecache_fd(struct fuse_file_info *info); void filecache_set_error(struct fuse_file_info *info, int error_code); void filecache_forensic_haven(const char *cache_path, filecache_t *cache, const char *path, off_t fsize, GError **gerr); void filecache_pdata_move(filecache_t *cache, const char *old_path, const char *new_path, GError **gerr); +void filecache_invalidate(filecache_t* cache, const char* path, GError** gerr); bool filecache_cleanup(filecache_t *cache, const char *cache_path, bool first, GError **gerr); struct curl_slist* enhanced_logging(struct curl_slist *slist, int log_level, int section, const char *format, ...); diff --git a/src/fusedav.c b/src/fusedav.c index 340a6345..4dff3dc6 100644 --- a/src/fusedav.c +++ b/src/fusedav.c @@ -178,7 +178,7 @@ static void getdir_propfind_callback(__unused void *userdata, const char *path, struct stat_cache_value *existing = NULL; struct stat_cache_value value; rwp_t rwp = PROPFIND; - GError *subgerr1 = NULL ; + GError *subgerr1 = NULL; log_print(LOG_INFO, SECTION_FUSEDAV_PROP, "%s: %s (%lu)", funcname, path, status_code); @@ -420,6 +420,10 @@ static void getdir_propfind_callback(__unused void *userdata, const char *path, if (subgerr1) { g_propagate_prefixed_error(gerr, subgerr1, "%s: ", funcname); } + filecache_invalidate(config->cache, path, &subgerr1); + if (subgerr1) { + g_propagate_prefixed_error(gerr, subgerr1, "%s: ", funcname); + } } } From 7c242805497078e64bbfa7f2352f117aef2df78f Mon Sep 17 00:00:00 2001 From: Will Milton Date: Sun, 4 Aug 2019 16:35:25 +0000 Subject: [PATCH 21/21] check updated children time for parent dir --- src/filecache.c | 42 +++++++++++++++++++++++++++++++++++++++--- src/filecache.h | 2 ++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/filecache.c b/src/filecache.c index 4e711be5..dbeab24b 100644 --- a/src/filecache.c +++ b/src/filecache.c @@ -347,6 +347,28 @@ static size_t write_response_to_fd(void *ptr, size_t size, size_t nmemb, void *u return real_size; } +static time_t get_parent_updated_children_time(filecache_t *cache, const char *path, GError **gerr) { + const char* funcname = "get_parent_updated_children_time"; + time_t parent_children_update_ts; + char *parent_path; + GError *tmpgerr = NULL; + + parent_path = path_parent(path); + if (parent_path == NULL) { + g_set_error(tmpgerr, filecache_quark(), EEXIST, "could not get parent path"); + g_propagate_prefixed_error(gerr, tmpgerr, "%s: ", funcname); + return parent_children_update_ts; + } + + log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "%s: Getting parent path entry: %s", funcname, parent_path); + parent_children_update_ts = stat_cache_read_updated_children(cache, parent_path, &tmpgerr); + + if (tmpgerr) { + g_propagate_prefixed_error(gerr, tmpgerr, "%s: ", funcname); + } + return parent_children_update_ts; +} + // Get a file descriptor pointing to the latest full copy of the file. static void get_fresh_fd(filecache_t *cache, const char *cache_path, const char *path, struct filecache_sdata *sdata, @@ -365,11 +387,19 @@ static void get_fresh_fd(filecache_t *cache, // Somewhat arbitrary static const unsigned small_time_allotment = 2000; // 2 seconds static const unsigned large_time_allotment = 8000; // 8 seconds + time_t parent_children_update_ts; + time_t check_now = time(NULL); BUMP(filecache_fresh_fd); clock_gettime(CLOCK_MONOTONIC, &start_time); + parent_children_update_ts = get_parent_updated_children_time(cache, path, &tmpgerr); + if (tmpgerr) { + g_propagate_prefixed_error(gerr, tmpgerr, "%s: ", funcname); + return; + } + assert(pdatap); pdata = *pdatap; @@ -382,11 +412,17 @@ static void get_fresh_fd(filecache_t *cache, // For O_TRUNC, we just want to open a truncated cache file and not bother getting a copy from // the server. // If not O_TRUNC, but the cache file is fresh, just reuse it without going to the server. + // If the parent directory was recently updated and the cache file wasn't explicitly invalidated, consider it fresh. // If the file is in-use (last_server_update = 0) we use the local file and don't go to the server. // If we're in saint mode, don't go to the server - if (pdata != NULL && - ((flags & O_TRUNC) || use_local_copy || - (pdata->last_server_update == 0) || pdata->last_server_update > FILECACHE_INVALIDATED)) { + if (pdata != NULL && ( + (flags & O_TRUNC) || use_local_copy || + (pdata->last_server_update == 0) || + ((check_now - pdata->last_server_update) <= FILECACHE_ENTRY_TTL) || + (((check_now - parent_children_update_ts) <= FILECACHE_PARENT_UPDATE_GRACE) + && (pdata->last_server_update > FILECACHE_INVALIDATED)) + ) + ) { const float samplerate = 1.0; // always sample stat log_print(LOG_DEBUG, SECTION_FILECACHE_OPEN, "%s: file is fresh or being truncated: %s::%s", funcname, path, pdata->filename); diff --git a/src/filecache.h b/src/filecache.h index 3ddadb7d..54eab5b7 100644 --- a/src/filecache.h +++ b/src/filecache.h @@ -38,6 +38,8 @@ #define E_FC_CURLERR ENETDOWN #define E_FC_FILETOOLARGE EFBIG #define FILECACHE_INVALIDATED 1 +#define FILECACHE_ENTRY_TTL 2 +#define FILECACHE_PARENT_UPDATE_GRACE 2 typedef leveldb_t filecache_t;