mirror of
https://github.com/haiwen/seafile-server.git
synced 2025-09-25 14:42:52 +00:00
Add --rm-fs option for gc (#541)
* Add --rm-fs option for gc * Use local variables and print fs number * Modify return value to gint64
This commit is contained in:
@@ -12,11 +12,6 @@
|
||||
|
||||
#define MAX_BF_SIZE (((size_t)1) << 29) /* 64 MB */
|
||||
|
||||
/* Total number of blocks to be scanned. */
|
||||
static guint64 total_blocks;
|
||||
static guint64 removed_blocks;
|
||||
static guint64 reachable_blocks;
|
||||
|
||||
/*
|
||||
* The number of bits in the bloom filter is 4 times the number of all blocks.
|
||||
* Let m be the bits in the bf, n be the number of blocks to be added to the bf
|
||||
@@ -36,11 +31,11 @@ static guint64 reachable_blocks;
|
||||
* So we set the minimal size of the bf to 1KB.
|
||||
*/
|
||||
static Bloom *
|
||||
alloc_gc_index ()
|
||||
alloc_gc_index (guint64 total_objs)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
size = (size_t) MAX(total_blocks << 2, 1 << 13);
|
||||
size = (size_t) MAX(total_objs << 2, 1 << 13);
|
||||
size = MIN (size, MAX_BF_SIZE);
|
||||
|
||||
seaf_message ("GC index size is %u Byte.\n", (int)size >> 3);
|
||||
@@ -50,7 +45,8 @@ alloc_gc_index ()
|
||||
|
||||
typedef struct {
|
||||
SeafRepo *repo;
|
||||
Bloom *index;
|
||||
Bloom *blocks_index;
|
||||
Bloom *fs_index;
|
||||
GHashTable *visited;
|
||||
|
||||
/* > 0: keep a period of history;
|
||||
@@ -71,7 +67,7 @@ static int
|
||||
add_blocks_to_index (SeafFSManager *mgr, GCData *data, const char *file_id)
|
||||
{
|
||||
SeafRepo *repo = data->repo;
|
||||
Bloom *index = data->index;
|
||||
Bloom *blocks_index = data->blocks_index;
|
||||
Seafile *seafile;
|
||||
int i;
|
||||
|
||||
@@ -82,7 +78,7 @@ add_blocks_to_index (SeafFSManager *mgr, GCData *data, const char *file_id)
|
||||
}
|
||||
|
||||
for (i = 0; i < seafile->n_blocks; ++i) {
|
||||
bloom_add (index, seafile->blk_sha1s[i]);
|
||||
bloom_add (blocks_index, seafile->blk_sha1s[i]);
|
||||
++data->traversed_blocks;
|
||||
}
|
||||
|
||||
@@ -91,6 +87,16 @@ add_blocks_to_index (SeafFSManager *mgr, GCData *data, const char *file_id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
add_fs_to_index(GCData *data, const char *file_id)
|
||||
{
|
||||
Bloom *fs_index = data->fs_index;
|
||||
if (fs_index) {
|
||||
bloom_add (fs_index, file_id);
|
||||
}
|
||||
++(data->traversed_fs_objs);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
fs_callback (SeafFSManager *mgr,
|
||||
const char *store_id,
|
||||
@@ -112,7 +118,7 @@ fs_callback (SeafFSManager *mgr,
|
||||
g_hash_table_replace (data->visited, key, key);
|
||||
}
|
||||
|
||||
++(data->traversed_fs_objs);
|
||||
add_fs_to_index(data, obj_id);
|
||||
|
||||
if (type == SEAF_METADATA_TYPE_FILE &&
|
||||
add_blocks_to_index (mgr, data, obj_id) < 0)
|
||||
@@ -169,8 +175,8 @@ traverse_commit (SeafCommit *commit, void *vdata, gboolean *stop)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int
|
||||
populate_gc_index_for_repo (SeafRepo *repo, Bloom *index, int verbose)
|
||||
static gint64
|
||||
populate_gc_index_for_repo (SeafRepo *repo, Bloom *blocks_index, Bloom *fs_index, int verbose)
|
||||
{
|
||||
GList *branches, *ptr;
|
||||
SeafBranch *branch;
|
||||
@@ -190,7 +196,8 @@ populate_gc_index_for_repo (SeafRepo *repo, Bloom *index, int verbose)
|
||||
|
||||
data = g_new0(GCData, 1);
|
||||
data->repo = repo;
|
||||
data->index = index;
|
||||
data->blocks_index = blocks_index;
|
||||
data->fs_index = fs_index;
|
||||
data->visited = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
|
||||
data->verbose = verbose;
|
||||
|
||||
@@ -232,7 +239,7 @@ populate_gc_index_for_repo (SeafRepo *repo, Bloom *index, int verbose)
|
||||
|
||||
seaf_message ("Traversed %d commits, %"G_GINT64_FORMAT" blocks.\n",
|
||||
data->traversed_commits, data->traversed_blocks);
|
||||
reachable_blocks += data->traversed_blocks;
|
||||
ret = data->traversed_blocks;
|
||||
|
||||
g_list_free (branches);
|
||||
g_hash_table_destroy (data->visited);
|
||||
@@ -244,6 +251,7 @@ populate_gc_index_for_repo (SeafRepo *repo, Bloom *index, int verbose)
|
||||
typedef struct {
|
||||
Bloom *index;
|
||||
int dry_run;
|
||||
guint64 removed_blocks;
|
||||
} CheckBlocksData;
|
||||
|
||||
static gboolean
|
||||
@@ -254,7 +262,7 @@ check_block_liveness (const char *store_id, int version,
|
||||
Bloom *index = data->index;
|
||||
|
||||
if (!bloom_test (index, block_id)) {
|
||||
++removed_blocks;
|
||||
data->removed_blocks++;
|
||||
if (!data->dry_run)
|
||||
seaf_block_manager_remove_block (seaf->block_mgr,
|
||||
store_id, version,
|
||||
@@ -264,13 +272,52 @@ check_block_liveness (const char *store_id, int version,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int
|
||||
populate_gc_index_for_virtual_repos (SeafRepo *repo, Bloom *index, int verbose)
|
||||
#define MAX_THREADS 10
|
||||
|
||||
static gint64
|
||||
check_existing_fs (char *store_id, int repo_version, GHashTable *exist_fs,
|
||||
Bloom *fs_index, int dry_run)
|
||||
{
|
||||
GHashTableIter iter;
|
||||
gpointer key, value;
|
||||
gint64 ret = 0;
|
||||
|
||||
g_hash_table_iter_init (&iter, exist_fs);
|
||||
|
||||
while (g_hash_table_iter_next (&iter, &key, &value)) {
|
||||
if (!bloom_test (fs_index, (char *)key)) {
|
||||
ret++;
|
||||
if (dry_run)
|
||||
continue;
|
||||
seaf_fs_manager_delete_object(seaf->fs_mgr,
|
||||
store_id, repo_version,
|
||||
(char *)key);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
collect_exist_fs (const char *store_id, int version,
|
||||
const char *fs_id, void *vdata)
|
||||
{
|
||||
GHashTable *exist_fs = vdata;
|
||||
int dummy;
|
||||
|
||||
g_hash_table_replace (exist_fs, g_strdup (fs_id), &dummy);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gint64
|
||||
populate_gc_index_for_virtual_repos (SeafRepo *repo, Bloom *blocks_index, Bloom *fs_index, int verbose)
|
||||
{
|
||||
GList *vrepo_ids = NULL, *ptr;
|
||||
char *repo_id;
|
||||
SeafRepo *vrepo;
|
||||
int ret = 0;
|
||||
gint64 ret = 0;
|
||||
gint64 scan_ret = 0;
|
||||
|
||||
vrepo_ids = seaf_repo_manager_get_virtual_repo_ids_by_origin (seaf->repo_mgr,
|
||||
repo->id);
|
||||
@@ -283,26 +330,35 @@ populate_gc_index_for_virtual_repos (SeafRepo *repo, Bloom *index, int verbose)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = populate_gc_index_for_repo (vrepo, index, verbose);
|
||||
scan_ret = populate_gc_index_for_repo (vrepo, blocks_index, fs_index, verbose);
|
||||
seaf_repo_unref (vrepo);
|
||||
if (ret < 0)
|
||||
if (scan_ret < 0) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
ret += scan_ret;
|
||||
}
|
||||
|
||||
out:
|
||||
string_list_free (vrepo_ids);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
gc_v1_repo (SeafRepo *repo, int dry_run, int verbose)
|
||||
gint64
|
||||
gc_v1_repo (SeafRepo *repo, int dry_run, int verbose, int rm_fs)
|
||||
{
|
||||
Bloom *index;
|
||||
int ret;
|
||||
Bloom *blocks_index = NULL;
|
||||
Bloom *fs_index = NULL;
|
||||
GHashTable *exist_fs = NULL;
|
||||
guint64 total_blocks;
|
||||
guint64 removed_blocks;
|
||||
guint64 reachable_blocks;
|
||||
guint64 total_fs = 0;
|
||||
gint64 removed_fs = 0;
|
||||
gint64 ret;
|
||||
|
||||
total_blocks = seaf_block_manager_get_block_number (seaf->block_mgr,
|
||||
repo->store_id, repo->version);
|
||||
removed_blocks = 0;
|
||||
reachable_blocks = 0;
|
||||
|
||||
if (total_blocks == 0) {
|
||||
@@ -310,6 +366,24 @@ gc_v1_repo (SeafRepo *repo, int dry_run, int verbose)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (rm_fs) {
|
||||
exist_fs = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
|
||||
ret = seaf_obj_store_foreach_obj (seaf->fs_mgr->obj_store,
|
||||
repo->store_id, repo->version,
|
||||
collect_exist_fs,
|
||||
exist_fs);
|
||||
if (ret < 0) {
|
||||
seaf_warning ("Failed to collect existing fs for repo %.8s, stop GC.\n\n",
|
||||
repo->id);
|
||||
goto out;
|
||||
}
|
||||
|
||||
total_fs = g_hash_table_size (exist_fs);
|
||||
}
|
||||
|
||||
if (rm_fs)
|
||||
seaf_message ("GC started. Total block number is %"G_GUINT64_FORMAT", total fs number is %"G_GUINT64_FORMAT".\n", total_blocks, total_fs);
|
||||
else
|
||||
seaf_message ("GC started. Total block number is %"G_GUINT64_FORMAT".\n", total_blocks);
|
||||
|
||||
/*
|
||||
@@ -318,33 +392,49 @@ gc_v1_repo (SeafRepo *repo, int dry_run, int verbose)
|
||||
* may skip some garbage blocks, but we won't delete
|
||||
* blocks that are still alive.
|
||||
*/
|
||||
index = alloc_gc_index ();
|
||||
if (!index) {
|
||||
seaf_warning ("GC: Failed to allocate index.\n");
|
||||
return -1;
|
||||
blocks_index = alloc_gc_index (total_blocks);
|
||||
if (!blocks_index) {
|
||||
seaf_warning ("GC: Failed to allocate blocks_index.\n");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rm_fs && total_fs > 0) {
|
||||
fs_index = alloc_gc_index (total_fs);
|
||||
if (!fs_index) {
|
||||
seaf_warning ("GC: Failed to allocate fs index for repo %.8s, stop gc.\n",
|
||||
repo->id);
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
seaf_message ("Populating index.\n");
|
||||
|
||||
ret = populate_gc_index_for_repo (repo, index, verbose);
|
||||
ret = populate_gc_index_for_repo (repo, blocks_index, fs_index, verbose);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
reachable_blocks += ret;
|
||||
|
||||
/* Since virtual repos share fs and block store with the origin repo,
|
||||
* it's necessary to do GC for them together.
|
||||
*/
|
||||
ret = populate_gc_index_for_virtual_repos (repo, index, verbose);
|
||||
ret = populate_gc_index_for_virtual_repos (repo, blocks_index, fs_index, verbose);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
reachable_blocks += ret;
|
||||
|
||||
if (!dry_run)
|
||||
seaf_message ("Scanning and deleting unused blocks.\n");
|
||||
else
|
||||
seaf_message ("Scanning unused blocks.\n");
|
||||
|
||||
CheckBlocksData data;
|
||||
data.index = index;
|
||||
data.index = blocks_index;
|
||||
data.dry_run = dry_run;
|
||||
data.removed_blocks = 0;
|
||||
|
||||
ret = seaf_block_manager_foreach_block (seaf->block_mgr,
|
||||
repo->store_id, repo->version,
|
||||
@@ -355,23 +445,53 @@ gc_v1_repo (SeafRepo *repo, int dry_run, int verbose)
|
||||
goto out;
|
||||
}
|
||||
|
||||
removed_blocks = data.removed_blocks;
|
||||
ret = removed_blocks;
|
||||
|
||||
if (!dry_run)
|
||||
if (rm_fs && total_fs > 0) {
|
||||
removed_fs = check_existing_fs(repo->store_id, repo->version, exist_fs,
|
||||
fs_index, dry_run);
|
||||
if (removed_fs < 0) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dry_run) {
|
||||
if (rm_fs)
|
||||
seaf_message ("GC finished for repo %.8s. %"G_GUINT64_FORMAT" blocks total, "
|
||||
"about %"G_GUINT64_FORMAT" reachable blocks, "
|
||||
"%"G_GUINT64_FORMAT" blocks are removed. "
|
||||
"%"G_GUINT64_FORMAT" fs are removed.\n",
|
||||
repo->id, total_blocks, reachable_blocks, removed_blocks, removed_fs);
|
||||
else
|
||||
seaf_message ("GC finished. %"G_GUINT64_FORMAT" blocks total, "
|
||||
"about %"G_GUINT64_FORMAT" reachable blocks, "
|
||||
"%"G_GUINT64_FORMAT" blocks are removed.\n",
|
||||
total_blocks, reachable_blocks, removed_blocks);
|
||||
} else {
|
||||
if (rm_fs)
|
||||
seaf_message ("GC finished for repo %.8s. %"G_GUINT64_FORMAT" blocks total, "
|
||||
"about %"G_GUINT64_FORMAT" reachable blocks, "
|
||||
"%"G_GUINT64_FORMAT" blocks can be removed. "
|
||||
"%"G_GUINT64_FORMAT" fs can be removed.\n",
|
||||
repo->id, total_blocks, reachable_blocks, removed_blocks, removed_fs);
|
||||
else
|
||||
seaf_message ("GC finished. %"G_GUINT64_FORMAT" blocks total, "
|
||||
"about %"G_GUINT64_FORMAT" reachable blocks, "
|
||||
"%"G_GUINT64_FORMAT" blocks can be removed.\n",
|
||||
total_blocks, reachable_blocks, removed_blocks);
|
||||
}
|
||||
|
||||
out:
|
||||
printf ("\n");
|
||||
|
||||
bloom_destroy (index);
|
||||
if (exist_fs)
|
||||
g_hash_table_destroy (exist_fs);
|
||||
|
||||
if (blocks_index)
|
||||
bloom_destroy (blocks_index);
|
||||
if (fs_index)
|
||||
bloom_destroy (fs_index);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -406,14 +526,14 @@ delete_garbaged_repos (int dry_run)
|
||||
}
|
||||
|
||||
int
|
||||
gc_core_run (GList *repo_id_list, int dry_run, int verbose)
|
||||
gc_core_run (GList *repo_id_list, int dry_run, int verbose, int rm_fs)
|
||||
{
|
||||
GList *ptr;
|
||||
SeafRepo *repo;
|
||||
GList *corrupt_repos = NULL;
|
||||
GList *del_block_repos = NULL;
|
||||
gboolean del_garbage = FALSE;
|
||||
int gc_ret;
|
||||
gint64 gc_ret;
|
||||
char *repo_id;
|
||||
|
||||
if (repo_id_list == NULL) {
|
||||
@@ -438,7 +558,7 @@ gc_core_run (GList *repo_id_list, int dry_run, int verbose)
|
||||
if (!repo->is_virtual) {
|
||||
seaf_message ("GC version %d repo %s(%s)\n",
|
||||
repo->version, repo->name, repo->id);
|
||||
gc_ret = gc_v1_repo (repo, dry_run, verbose);
|
||||
gc_ret = gc_v1_repo (repo, dry_run, verbose, rm_fs);
|
||||
if (gc_ret < 0) {
|
||||
corrupt_repos = g_list_prepend (corrupt_repos, g_strdup(repo->id));
|
||||
} else if (dry_run && gc_ret) {
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#ifndef GC_CORE_H
|
||||
#define GC_CORE_H
|
||||
|
||||
int gc_core_run (GList *repo_id_list, int dry_run, int verbose);
|
||||
int gc_core_run (GList *repo_id_list, int dry_run, int verbose, int rm_fs);
|
||||
|
||||
void
|
||||
delete_garbaged_repos (int dry_run);
|
||||
|
@@ -15,7 +15,7 @@ static char *central_config_dir = NULL;
|
||||
|
||||
SeafileSession *seaf;
|
||||
|
||||
static const char *short_opts = "hvc:d:VDrF:";
|
||||
static const char *short_opts = "hvc:d:VDrRF:";
|
||||
static const struct option long_opts[] = {
|
||||
{ "help", no_argument, NULL, 'h', },
|
||||
{ "version", no_argument, NULL, 'v', },
|
||||
@@ -25,6 +25,7 @@ static const struct option long_opts[] = {
|
||||
{ "verbose", no_argument, NULL, 'V' },
|
||||
{ "dry-run", no_argument, NULL, 'D' },
|
||||
{ "rm-deleted", no_argument, NULL, 'r' },
|
||||
{ "rm-fs", no_argument, NULL, 'R' },
|
||||
{ 0, 0, 0, 0 },
|
||||
};
|
||||
|
||||
@@ -35,6 +36,7 @@ static void usage ()
|
||||
"[repo_id_1 [repo_id_2 ...]]\n"
|
||||
"Additional options:\n"
|
||||
"-r, --rm-deleted: remove garbaged repos\n"
|
||||
"-R, --rm-fs: remove fs object\n"
|
||||
"-D, --dry-run: report blocks that can be remove, but not remove them\n"
|
||||
"-V, --verbose: verbose output messages\n");
|
||||
}
|
||||
@@ -72,6 +74,7 @@ main(int argc, char *argv[])
|
||||
int verbose = 0;
|
||||
int dry_run = 0;
|
||||
int rm_garbage = 0;
|
||||
int rm_fs = 0;
|
||||
|
||||
#ifdef WIN32
|
||||
argv = get_argv_utf8 (&argc);
|
||||
@@ -106,6 +109,9 @@ main(int argc, char *argv[])
|
||||
case 'r':
|
||||
rm_garbage = 1;
|
||||
break;
|
||||
case 'R':
|
||||
rm_fs = 1;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
exit(-1);
|
||||
@@ -140,7 +146,7 @@ main(int argc, char *argv[])
|
||||
for (i = optind; i < argc; i++)
|
||||
repo_id_list = g_list_append (repo_id_list, g_strdup(argv[i]));
|
||||
|
||||
gc_core_run (repo_id_list, dry_run, verbose);
|
||||
gc_core_run (repo_id_list, dry_run, verbose, rm_fs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user