summaryrefslogtreecommitdiffstats
path: root/usr.bin
diff options
context:
space:
mode:
authorkientzle <kientzle@FreeBSD.org>2004-04-15 22:37:54 +0000
committerkientzle <kientzle@FreeBSD.org>2004-04-15 22:37:54 +0000
commit2b19543d6c25a0cb41471328b2231cf8b28a08a2 (patch)
tree63854c015a3c1c11aa0d4ca3483b033926773747 /usr.bin
parent7d7307d2e1689754b4def7b381a46fd20919d442 (diff)
downloadFreeBSD-src-2b19543d6c25a0cb41471328b2231cf8b28a08a2.zip
FreeBSD-src-2b19543d6c25a0cb41471328b2231cf8b28a08a2.tar.gz
As suggested by Julian Elischer, use a self-sizing hash
table for the hardlink cache. This dramatically improves performance when archiving millions of hardlinked files. While I'm here, clean up some style bugs (per Bruce Evans) and clarify some comments.
Diffstat (limited to 'usr.bin')
-rw-r--r--usr.bin/tar/bsdtar.c12
-rw-r--r--usr.bin/tar/bsdtar.h52
-rw-r--r--usr.bin/tar/util.c36
-rw-r--r--usr.bin/tar/write.c308
4 files changed, 279 insertions, 129 deletions
diff --git a/usr.bin/tar/bsdtar.c b/usr.bin/tar/bsdtar.c
index 690270f..dcdafff 100644
--- a/usr.bin/tar/bsdtar.c
+++ b/usr.bin/tar/bsdtar.c
@@ -115,7 +115,6 @@ main(int argc, char **argv)
struct bsdtar *bsdtar, bsdtar_storage;
struct passwd *pwent;
int opt;
- int i;
char mode;
char buff[16];
@@ -391,16 +390,7 @@ main(int argc, char **argv)
if (bsdtar->user_uname != NULL)
free(bsdtar->user_uname);
- for (i = 0; i < bsdtar_hash_size; i++) {
- if (bsdtar->uname_lookup[i].uname != NULL)
- free(bsdtar->uname_lookup[i].uname);
- }
-
- for (i = 0; i < bsdtar_hash_size; i++) {
- if (bsdtar->gname_lookup[i].gname != NULL)
- free(bsdtar->gname_lookup[i].gname);
- }
-
+ cleanup_exclusions(bsdtar);
return 0;
}
diff --git a/usr.bin/tar/bsdtar.h b/usr.bin/tar/bsdtar.h
index f11f451..4b1ef32 100644
--- a/usr.bin/tar/bsdtar.h
+++ b/usr.bin/tar/bsdtar.h
@@ -32,15 +32,13 @@
#include <archive.h>
#include <stdio.h>
-/* Data for exclusion/inclusion handling: defined in matching.c */
-struct matching;
-struct links_entry;
-struct archive_dir_entry;
-
/*
- * The internal state for the "bsdtar" program. This is registered
- * with the 'archive' structure so that this information will be
- * available to the read/write callbacks.
+ * The internal state for the "bsdtar" program.
+ *
+ * Keeping all of the state in a structure like this simplifies memory
+ * leak testing (at exit, anything left on the heap is suspect). A
+ * pointer to this structure is passed to most bsdtar internal
+ * functions.
*/
struct bsdtar {
/* Options */
@@ -66,31 +64,22 @@ struct bsdtar {
int fd;
/* Miscellaneous state information */
- size_t u_width; /* for 'list_item' */
- size_t gs_width; /* For 'list_item' */
- char *user_uname; /* User running this program */
- uid_t user_uid; /* UID running this program */
int argc;
char **argv;
+ size_t gs_width; /* For 'list_item' in read.c */
+ size_t u_width; /* for 'list_item' in read.c */
+ char *user_uname; /* User running this program */
+ uid_t user_uid; /* UID running this program */
- struct matching *matching;
-
- struct links_entry *links_head;
- struct archive_dir_entry *archive_dir_head, *archive_dir_tail;
-
- /* An arbitrary prime number. */
- #define bsdtar_hash_size 71
- /* A simple hash of uid/uname for caching uname lookups. */
- struct {
- uid_t uid;
- char *uname;
- } uname_lookup[bsdtar_hash_size];
-
- /* A simple hash of gid/gname for caching gname lookups. */
- struct {
- gid_t gid;
- char *gname;
- } gname_lookup[bsdtar_hash_size];
+ /*
+ * Data for various subsystems. Full definitions are located in
+ * the file where they are used.
+ */
+ struct archive_dir *archive_dir; /* for write.c */
+ struct gname_cache *gname_cache; /* for write.c */
+ struct links_cache *links_cache; /* for write.c */
+ struct matching *matching; /* for matching.c */
+ struct uname_cache *uname_cache; /* for write.c */
};
const char *bsdtar_progname(void);
@@ -100,15 +89,12 @@ void cleanup_exclusions(struct bsdtar *);
void exclude(struct bsdtar *, const char *pattern);
int excluded(struct bsdtar *, const char *pathname);
void include(struct bsdtar *, const char *pattern);
-
void safe_fprintf(FILE *, const char *fmt, ...);
-
void tar_mode_c(struct bsdtar *bsdtar);
void tar_mode_r(struct bsdtar *bsdtar);
void tar_mode_t(struct bsdtar *bsdtar);
void tar_mode_u(struct bsdtar *bsdtar);
void tar_mode_x(struct bsdtar *bsdtar);
-
int unmatched_inclusions(struct bsdtar *bsdtar);
void usage(void);
int yes(const char *fmt, ...);
diff --git a/usr.bin/tar/util.c b/usr.bin/tar/util.c
index 66e356d..441fab2 100644
--- a/usr.bin/tar/util.c
+++ b/usr.bin/tar/util.c
@@ -45,27 +45,33 @@ void
safe_fprintf(FILE *f, const char *fmt, ...)
{
char *buff;
- char *buffheap;
- char buffstack[256];
- int bufflength;
+ char *buff_heap;
+ int buff_length;
int length;
va_list ap;
char *p;
+ char buff_stack[256];
- /* Use a stack-allocated buffer if we can. */
- buffheap = NULL;
- bufflength = 256;
- buff = buffstack;
+ /* Use a stack-allocated buffer if we can, for speed and safety. */
+ buff_heap = NULL;
+ buff_length = sizeof(buff_stack);
+ buff = buff_stack;
va_start(ap, fmt);
- length = vsnprintf(buff, bufflength, fmt, ap);
+ length = vsnprintf(buff, buff_length, fmt, ap);
+ va_end(ap);
/* If the result is too large, allocate a buffer on the heap. */
- if (length >= bufflength) {
- bufflength = length+1;
- buff = buffheap = malloc(bufflength);
- length = vsnprintf(buff, bufflength, fmt, ap);
+ if (length >= buff_length) {
+ buff_length = length+1;
+ buff_heap = malloc(buff_length);
+ /* Failsafe: use the truncated string if malloc fails. */
+ if (buff_heap != NULL) {
+ buff = buff_heap;
+ va_start(ap, fmt);
+ length = vsnprintf(buff, buff_length, fmt, ap);
+ va_end(ap);
+ }
}
- va_end(ap);
for (p=buff; *p != '\0'; p++) {
unsigned char c = *p;
@@ -89,8 +95,8 @@ safe_fprintf(FILE *f, const char *fmt, ...)
}
}
/* If we allocated a heap-based buffer, free it now. */
- if (buffheap != NULL)
- free(buffheap);
+ if (buff_heap != NULL)
+ free(buff_heap);
}
static void
diff --git a/usr.bin/tar/write.c b/usr.bin/tar/write.c
index 2d418da..2110bbb 100644
--- a/usr.bin/tar/write.c
+++ b/usr.bin/tar/write.c
@@ -48,6 +48,38 @@ __FBSDID("$FreeBSD$");
#include "bsdtar.h"
+/* Fixed size of uname/gname cache. */
+#define gname_cache_size 101
+#define uname_cache_size 101
+
+/* Initial size of link cache. */
+#define links_cache_initial_size 1024
+
+struct archive_dir_entry {
+ struct archive_dir_entry *next;
+ time_t mtime_sec;
+ int mtime_nsec;
+ char *name;
+};
+
+struct archive_dir {
+ struct archive_dir_entry *head, *tail;
+};
+
+struct gname_cache {
+ struct {
+ gid_t id;
+ char *name;
+ } cache[gname_cache_size];
+};
+
+struct links_cache {
+ unsigned long number_entries;
+ size_t number_buckets;
+ struct links_entry **buckets;
+ char stop_allocating;
+};
+
struct links_entry {
struct links_entry *next;
struct links_entry *previous;
@@ -57,30 +89,30 @@ struct links_entry {
char *name;
};
-struct archive_dir_entry {
- struct archive_dir_entry *next;
- time_t mtime_sec;
- int mtime_nsec;
- char *name;
-};
+struct uname_cache {
+ struct {
+ uid_t id;
+ char *name;
+ } cache[uname_cache_size];
+};
static void add_dir_list(struct bsdtar *bsdtar, const char *path,
time_t mtime_sec, int mtime_nsec);
-void archive_names_from_file(struct bsdtar *bsdtar,
+static void archive_names_from_file(struct bsdtar *bsdtar,
struct archive *a);
static void create_cleanup(struct bsdtar *);
static int append_archive(struct bsdtar *, struct archive *,
const char *fname);
static const char * lookup_gname(struct bsdtar *bsdtar, gid_t gid);
+static void lookup_hardlink(struct bsdtar *,
+ struct archive_entry *entry, const struct stat *);
static const char * lookup_uname(struct bsdtar *bsdtar, uid_t uid);
static int new_enough(struct bsdtar *, const char *path,
time_t mtime_sec, int mtime_nsec);
-static void record_hardlink(struct bsdtar *,
- struct archive_entry *entry, const struct stat *);
-void setup_acls(struct bsdtar *, struct archive_entry *,
+static void setup_acls(struct bsdtar *, struct archive_entry *,
const char *path);
-void test_for_append(struct bsdtar *);
+static void test_for_append(struct bsdtar *);
static void write_archive(struct archive *, struct bsdtar *);
static void write_entry(struct bsdtar *, struct archive *,
struct stat *, const char *pathname,
@@ -204,6 +236,10 @@ tar_mode_u(struct bsdtar *bsdtar)
const char *filename;
int format;
struct archive_dir_entry *p;
+ struct archive_dir archive_dir;
+
+ bsdtar->archive_dir = &archive_dir;
+ memset(&archive_dir, 0, sizeof(archive_dir));
filename = NULL;
format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED;
@@ -261,13 +297,13 @@ tar_mode_u(struct bsdtar *bsdtar)
close(bsdtar->fd);
bsdtar->fd = -1;
- while (bsdtar->archive_dir_head != NULL) {
- p = bsdtar->archive_dir_head->next;
- free(bsdtar->archive_dir_head->name);
- free(bsdtar->archive_dir_head);
- bsdtar->archive_dir_head = p;
+ while (bsdtar->archive_dir->head != NULL) {
+ p = bsdtar->archive_dir->head->next;
+ free(bsdtar->archive_dir->head->name);
+ free(bsdtar->archive_dir->head);
+ bsdtar->archive_dir->head = p;
}
- bsdtar->archive_dir_tail = NULL;
+ bsdtar->archive_dir->tail = NULL;
}
@@ -645,7 +681,7 @@ write_entry(struct bsdtar *bsdtar, struct archive *a, struct stat *st,
/* If there are hard links, record it for later use */
if (!S_ISDIR(st->st_mode) && (st->st_nlink > 1))
- record_hardlink(bsdtar, entry, st);
+ lookup_hardlink(bsdtar, entry, st);
/* Non-regular files get archived with zero size. */
if (!S_ISREG(st->st_mode))
@@ -778,34 +814,132 @@ write_file_data(struct archive *a, int fd)
static void
create_cleanup(struct bsdtar * bsdtar)
{
- /* Free inode->name map */
- while (bsdtar->links_head != NULL) {
- struct links_entry *lp = bsdtar->links_head->next;
-
- if (bsdtar->option_warn_links)
- bsdtar_warnc(0, "Missing links to %s",
- bsdtar->links_head->name);
-
- if (bsdtar->links_head->name != NULL)
- free(bsdtar->links_head->name);
- free(bsdtar->links_head);
- bsdtar->links_head = lp;
+ struct links_cache *links_cache;
+ struct uname_cache *ucache;
+ struct gname_cache *gcache;
+ size_t i;
+
+
+ /* Free inode->pathname map used for hardlink detection. */
+ if (bsdtar->links_cache != NULL) {
+ links_cache = bsdtar->links_cache;
+ if (links_cache->buckets != NULL) {
+ for (i = 0; i < links_cache->number_buckets; i++) {
+ while (links_cache->buckets[i] != NULL) {
+ struct links_entry *lp =
+ links_cache->buckets[i]->next;
+ if (bsdtar->option_warn_links)
+ bsdtar_warnc(0,
+ "Missing links to %s",
+ links_cache->buckets[i]->name);
+ if (links_cache->buckets[i]->name != NULL)
+ free(links_cache->buckets[i]->name);
+ free(links_cache->buckets[i]);
+ links_cache->buckets[i] = lp;
+ }
+ }
+ free(links_cache->buckets);
+ }
+ free(bsdtar->links_cache);
+ bsdtar->links_cache = NULL;
+ }
+
+ if (bsdtar->uname_cache != NULL) {
+ ucache = bsdtar->uname_cache;
+ for(i = 0; i < uname_cache_size; i++) {
+ if (ucache->cache[i].name != NULL)
+ free(ucache->cache[i].name);
+ }
+ free(ucache);
+ bsdtar->uname_cache = NULL;
+ }
+
+ if (bsdtar->gname_cache != NULL) {
+ gcache = bsdtar->gname_cache;
+ for(i = 0; i < gname_cache_size; i++) {
+ if (gcache->cache[i].name != NULL)
+ free(gcache->cache[i].name);
+ }
+ free(gcache);
+ bsdtar->gname_cache = NULL;
}
- cleanup_exclusions(bsdtar);
}
static void
-record_hardlink(struct bsdtar *bsdtar, struct archive_entry *entry,
+lookup_hardlink(struct bsdtar *bsdtar, struct archive_entry *entry,
const struct stat *st)
{
- struct links_entry *le;
+ struct links_cache *links_cache;
+ struct links_entry *le, **new_buckets;
+ int hash;
+ size_t i, new_size;
+
+ /* If necessary, initialize the links cache. */
+ links_cache = bsdtar->links_cache;
+ if (links_cache == NULL) {
+ bsdtar->links_cache = malloc(sizeof(struct links_cache));
+ if (bsdtar->links_cache == NULL)
+ bsdtar_errc(1, ENOMEM,
+ "No memory for hardlink detection.");
+ links_cache = bsdtar->links_cache;
+ memset(links_cache, 0, sizeof(struct links_cache));
+ links_cache->number_buckets = links_cache_initial_size;
+ links_cache->buckets = malloc(links_cache->number_buckets *
+ sizeof(links_cache->buckets[0]));
+ if (links_cache->buckets == NULL) {
+ bsdtar_errc(1, ENOMEM,
+ "No memory for hardlink detection.");
+ }
+ for (i = 0; i < links_cache->number_buckets; i++)
+ links_cache->buckets[i] = NULL;
+ }
- /*
- * First look in the list of multiply-linked files. If we've
- * already dumped it, convert this entry to a hard link entry.
- */
- for (le = bsdtar->links_head; le != NULL; le = le->next) {
+ /* If the links cache is getting too full, enlarge the hash table. */
+ if (links_cache->number_entries > links_cache->number_buckets * 2 &&
+ !links_cache->stop_allocating)
+ {
+ int count;
+
+ new_size = links_cache->number_buckets * 2;
+ new_buckets = malloc(new_size * sizeof(struct links_entry *));
+
+ count = 0;
+
+ if (new_buckets != NULL) {
+ memset(new_buckets, 0,
+ new_size * sizeof(struct links_entry *));
+ for (i = 0; i < links_cache->number_buckets; i++) {
+ while (links_cache->buckets[i] != NULL) {
+ /* Remove entry from old bucket. */
+ le = links_cache->buckets[i];
+ links_cache->buckets[i] = le->next;
+
+ /* Add entry to new bucket. */
+ hash = (le->dev ^ le->ino) % new_size;
+
+ if (new_buckets[hash] != NULL)
+ new_buckets[hash]->previous =
+ le;
+ le->next = new_buckets[hash];
+ le->previous = NULL;
+ new_buckets[hash] = le;
+ }
+ }
+ free(links_cache->buckets);
+ links_cache->buckets = new_buckets;
+ links_cache->number_buckets = new_size;
+ } else {
+ links_cache->stop_allocating = 1;
+ bsdtar_warnc(ENOMEM, "No more memory for recording "
+ "hard links; Remaining hard links will be "
+ "dumped as full files.");
+ }
+ }
+
+ /* Try to locate this entry in the links cache. */
+ hash = ( st->st_dev ^ st->st_ino ) % links_cache->number_buckets;
+ for (le = links_cache->buckets[hash]; le != NULL; le = le->next) {
if (le->dev == st->st_dev && le->ino == st->st_ino) {
archive_entry_copy_hardlink(entry, le->name);
@@ -822,8 +956,9 @@ record_hardlink(struct bsdtar *bsdtar, struct archive_entry *entry,
le->next->previous = le->previous;
if (le->name != NULL)
free(le->name);
- if (bsdtar->links_head == le)
- bsdtar->links_head = le->next;
+ if (links_cache->buckets[hash] == le)
+ links_cache->buckets[hash] = le->next;
+ links_cache->number_entries--;
free(le);
}
@@ -831,12 +966,24 @@ record_hardlink(struct bsdtar *bsdtar, struct archive_entry *entry,
}
}
+ if (links_cache->stop_allocating)
+ return;
+
+ /* Add this entry to the links cache. */
le = malloc(sizeof(struct links_entry));
- if (bsdtar->links_head != NULL)
- bsdtar->links_head->previous = le;
- le->next = bsdtar->links_head;
+ if (le == NULL) {
+ links_cache->stop_allocating = 1;
+ bsdtar_warnc(ENOMEM, "No more memory for recording "
+ "hard links; Remaining hard links will be dumped "
+ "as full files.");
+ return;
+ }
+ if (links_cache->buckets[hash] != NULL)
+ links_cache->buckets[hash]->previous = le;
+ links_cache->number_entries++;
+ le->next = links_cache->buckets[hash];
le->previous = NULL;
- bsdtar->links_head = le;
+ links_cache->buckets[hash] = le;
le->dev = st->st_dev;
le->ino = st->st_ino;
le->links = st->st_nlink - 1;
@@ -856,9 +1003,10 @@ setup_acls(struct bsdtar *bsdtar, struct archive_entry *entry,
setup_acl(bsdtar, entry, accpath,
ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
- /* XXX Don't bother with default for non-directories. XXX */
- setup_acl(bsdtar, entry, accpath,
- ACL_TYPE_DEFAULT, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
+ /* Only directories can have default ACLs. */
+ if (S_ISDIR(archive_entry_mode(entry)))
+ setup_acl(bsdtar, entry, accpath,
+ ACL_TYPE_DEFAULT, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
}
void
@@ -939,15 +1087,24 @@ const char *
lookup_uname(struct bsdtar *bsdtar, uid_t uid)
{
struct passwd *pwent;
+ struct uname_cache *cache;
int slot;
- slot = uid % bsdtar_hash_size;
- if (bsdtar->uname_lookup[slot].uname != NULL) {
- if (bsdtar->uname_lookup[slot].uid == uid)
- return (bsdtar->uname_lookup[slot].uname);
- free(bsdtar->uname_lookup[slot].uname);
- bsdtar->uname_lookup[slot].uname = NULL;
+ if (bsdtar->uname_cache == NULL) {
+ bsdtar->uname_cache = malloc(sizeof(struct uname_cache));
+ memset(bsdtar->uname_cache, 0, sizeof(struct uname_cache));
+ }
+
+ cache = bsdtar->uname_cache;
+
+ slot = uid % uname_cache_size;
+ if (cache->cache[slot].name != NULL) {
+ if (cache->cache[slot].id == uid)
+ return (cache->cache[slot].name);
+
+ free(cache->cache[slot].name);
+ cache->cache[slot].name = NULL;
}
pwent = getpwuid(uid);
@@ -956,8 +1113,8 @@ lookup_uname(struct bsdtar *bsdtar, uid_t uid)
bsdtar_warnc(errno, "getpwuid(%d) failed", uid);
return (NULL);
} else if (pwent->pw_name != NULL && pwent->pw_name[0] != '\0') {
- bsdtar->uname_lookup[slot].uname = strdup(pwent->pw_name);
- bsdtar->uname_lookup[slot].uid = uid;
+ cache->cache[slot].name = strdup(pwent->pw_name);
+ cache->cache[slot].id = uid;
}
return (NULL);
}
@@ -966,15 +1123,21 @@ const char *
lookup_gname(struct bsdtar *bsdtar, gid_t gid)
{
struct group *grent;
+ struct gname_cache *cache;
int slot;
- slot = gid % bsdtar_hash_size;
- if (bsdtar->gname_lookup[slot].gname != NULL) {
- if (bsdtar->gname_lookup[slot].gid == gid)
- return (bsdtar->gname_lookup[slot].gname);
+ if (bsdtar->gname_cache == NULL) {
+ bsdtar->gname_cache = malloc(sizeof(struct gname_cache));
+ memset(bsdtar->gname_cache, 0, sizeof(struct gname_cache));
+ }
- free(bsdtar->gname_lookup[slot].gname);
- bsdtar->gname_lookup[slot].gname = NULL;
+ cache = bsdtar->gname_cache;
+ slot = gid % gname_cache_size;
+ if (cache->cache[slot].name != NULL) {
+ if (cache->cache[slot].id == gid)
+ return (cache->cache[slot].name);
+ free(cache->cache[slot].name);
+ cache->cache[slot].name = NULL;
}
grent = getgrgid(gid);
@@ -983,8 +1146,8 @@ lookup_gname(struct bsdtar *bsdtar, gid_t gid)
bsdtar_warnc(errno, "getgrgid(%d) failed", gid);
return (NULL);
} else if (grent->gr_name != NULL && grent->gr_name[0] != '\0') {
- bsdtar->gname_lookup[slot].gname = strdup(grent->gr_name);
- bsdtar->gname_lookup[slot].gid = gid;
+ cache->cache[slot].name = strdup(grent->gr_name);
+ cache->cache[slot].id = gid;
}
return (NULL);
}
@@ -1002,10 +1165,11 @@ new_enough(struct bsdtar *bsdtar, const char *path,
if (path[0] == '.' && path[1] == '/' && path[2] != '\0')
path += 2;
- if (bsdtar->archive_dir_head == NULL)
+ if (bsdtar->archive_dir == NULL ||
+ bsdtar->archive_dir->head == NULL)
return (1);
- for (p = bsdtar->archive_dir_head; p != NULL; p = p->next) {
+ for (p = bsdtar->archive_dir->head; p != NULL; p = p->next) {
if (strcmp(path, p->name)==0)
return (p->mtime_sec < mtime_sec ||
(p->mtime_sec == mtime_sec &&
@@ -1028,7 +1192,11 @@ add_dir_list(struct bsdtar *bsdtar, const char *path,
if (path[0] == '.' && path[1] == '/' && path[2] != '\0')
path += 2;
- p = bsdtar->archive_dir_head;
+ /*
+ * Search entire list to see if this file has appeared before.
+ * If it has, override the timestamp data.
+ */
+ p = bsdtar->archive_dir->head;
while (p != NULL) {
if (strcmp(path, p->name)==0) {
p->mtime_sec = mtime_sec;
@@ -1043,11 +1211,11 @@ add_dir_list(struct bsdtar *bsdtar, const char *path,
p->mtime_sec = mtime_sec;
p->mtime_nsec = mtime_nsec;
p->next = NULL;
- if (bsdtar->archive_dir_tail == NULL) {
- bsdtar->archive_dir_head = bsdtar->archive_dir_tail = p;
+ if (bsdtar->archive_dir->tail == NULL) {
+ bsdtar->archive_dir->head = bsdtar->archive_dir->tail = p;
} else {
- bsdtar->archive_dir_tail->next = p;
- bsdtar->archive_dir_tail = p;
+ bsdtar->archive_dir->tail->next = p;
+ bsdtar->archive_dir->tail = p;
}
}
OpenPOWER on IntegriCloud