summaryrefslogtreecommitdiffstats
path: root/usr.bin/sort
diff options
context:
space:
mode:
authorgabor <gabor@FreeBSD.org>2012-05-25 09:30:16 +0000
committergabor <gabor@FreeBSD.org>2012-05-25 09:30:16 +0000
commit02feec4a4507f678cfd5036cbb906dcc9908b6ba (patch)
treee9eb2b40eabd4689dc4eb5b909ee80cfe21a2b0d /usr.bin/sort
parent95907ed567f199388c4cd044d314ce8dde553626 (diff)
downloadFreeBSD-src-02feec4a4507f678cfd5036cbb906dcc9908b6ba.zip
FreeBSD-src-02feec4a4507f678cfd5036cbb906dcc9908b6ba.tar.gz
- Only use multi-threading for large files
- Do not use mmap() by default; it can be enabled by --mmap - Add some minor optimizations for -u - Update manual page according to the changes Submitted by: Oleg Moskalenko <oleg.moskalenko@citrix.com>
Diffstat (limited to 'usr.bin/sort')
-rw-r--r--usr.bin/sort/bwstring.c16
-rw-r--r--usr.bin/sort/file.c48
-rw-r--r--usr.bin/sort/file.h3
-rw-r--r--usr.bin/sort/radixsort.c17
-rw-r--r--usr.bin/sort/sort.1.in5
-rw-r--r--usr.bin/sort/sort.c23
-rw-r--r--usr.bin/sort/sort.h1
7 files changed, 90 insertions, 23 deletions
diff --git a/usr.bin/sort/bwstring.c b/usr.bin/sort/bwstring.c
index 9ebeb10..5733732 100644
--- a/usr.bin/sort/bwstring.c
+++ b/usr.bin/sort/bwstring.c
@@ -499,6 +499,22 @@ bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
}
return (bwssbdup(ret, *len));
+ } else if (!zero_ended && (MB_CUR_MAX == 1)) {
+ char *ret;
+
+ ret = fgetln(f, len);
+
+ if (ret == NULL) {
+ if (!feof(f))
+ err(2, NULL);
+ return (NULL);
+ }
+ if (*len > 0) {
+ if (ret[*len - 1] == '\n')
+ --(*len);
+ }
+ return (bwscsbdup(ret, *len));
+
} else {
wchar_t c = 0;
diff --git a/usr.bin/sort/file.c b/usr.bin/sort/file.c
index 9afa4b2..68c3f54 100644
--- a/usr.bin/sort/file.c
+++ b/usr.bin/sort/file.c
@@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$");
unsigned long long free_memory = 1000000;
unsigned long long available_free_memory = 1000000;
+bool use_mmap;
+
const char *tmpdir = "/var/tmp";
const char *compress_program;
@@ -404,23 +406,21 @@ sort_list_dump(struct sort_list *l, const char *fn)
err(2, NULL);
if (l->list) {
- struct sort_list_item *last_printed_item;
size_t i;
-
- last_printed_item = NULL;
-
- for (i = 0; i < l->count; i++) {
- struct sort_list_item *item;
-
- item = l->list[i];
-
- if (!(sort_opts_vals.uflag) ||
- (last_printed_item == NULL) ||
- list_coll(&last_printed_item, &item)) {
- bwsfwrite(item->str, f,
+ if (!(sort_opts_vals.uflag)) {
+ for (i = 0; i < l->count; ++i)
+ bwsfwrite(l->list[i]->str, f,
sort_opts_vals.zflag);
- if (sort_opts_vals.uflag)
+ } else {
+ struct sort_list_item *last_printed_item = NULL;
+ struct sort_list_item *item;
+ for (i = 0; i < l->count; ++i) {
+ item = l->list[i];
+ if ((last_printed_item == NULL) ||
+ list_coll(&last_printed_item, &item)) {
+ bwsfwrite(item->str, f, sort_opts_vals.zflag);
last_printed_item = item;
+ }
}
}
}
@@ -657,7 +657,7 @@ file_reader_init(const char *fsrc)
ret->fname = sort_strdup(fsrc);
- if (strcmp(fsrc, "-") && (compress_program == NULL)) {
+ if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) {
do {
struct stat stat_buf;
@@ -1539,7 +1539,9 @@ mt_sort(struct sort_list *list,
const char* fn)
{
#if defined(SORT_THREADS)
- if (nthreads < 2 || list->count < nthreads) {
+ if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) {
+ size_t nthreads_save = nthreads;
+ nthreads = 1;
#endif
/* if single thread or small data, do simple sort */
sort_func(list->list, list->count,
@@ -1547,6 +1549,7 @@ mt_sort(struct sort_list *list,
(int(*)(const void *, const void *)) list_coll);
sort_list_dump(list, fn);
#if defined(SORT_THREADS)
+ nthreads = nthreads_save;
} else {
/* multi-threaded sort */
struct sort_list **parts;
@@ -1590,7 +1593,18 @@ mt_sort(struct sort_list *list,
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
- pthread_create(&pth, &attr, mt_sort_thread, parts[i]);
+ for (;;) {
+ int res = pthread_create(&pth, &attr,
+ mt_sort_thread, parts[i]);
+
+ if (res >= 0)
+ break;
+ if (errno == EAGAIN) {
+ pthread_yield();
+ continue;
+ }
+ err(2, NULL);
+ }
pthread_attr_destroy(&attr);
}
diff --git a/usr.bin/sort/file.h b/usr.bin/sort/file.h
index fd23922..47c2299 100644
--- a/usr.bin/sort/file.h
+++ b/usr.bin/sort/file.h
@@ -84,6 +84,9 @@ struct file0_reader
extern unsigned long long free_memory;
extern unsigned long long available_free_memory;
+/* Are we using mmap ? */
+extern bool use_mmap;
+
/* temporary file dir */
extern const char *tmpdir;
diff --git a/usr.bin/sort/radixsort.c b/usr.bin/sort/radixsort.c
index 6fa2d3a..ccaa994 100644
--- a/usr.bin/sort/radixsort.c
+++ b/usr.bin/sort/radixsort.c
@@ -609,7 +609,17 @@ run_top_sort_level(struct sort_level *sl)
pthread_attr_setdetachstate(&attr,
PTHREAD_DETACHED);
- pthread_create(&pth, &attr, sort_thread, NULL);
+ for (;;) {
+ int res = pthread_create(&pth, &attr,
+ sort_thread, NULL);
+ if (res >= 0)
+ break;
+ if (errno == EAGAIN) {
+ pthread_yield();
+ continue;
+ }
+ err(2, NULL);
+ }
pthread_attr_destroy(&attr);
}
@@ -626,6 +636,10 @@ run_sort(struct sort_list_item **base, size_t nmemb)
struct sort_level *sl;
#if defined(SORT_THREADS)
+ size_t nthreads_save = nthreads;
+ if (nmemb < MT_SORT_THRESHOLD)
+ nthreads = 1;
+
if (nthreads > 1) {
pthread_mutexattr_t mattr;
@@ -663,6 +677,7 @@ run_sort(struct sort_list_item **base, size_t nmemb)
pthread_mutex_destroy(&g_ls_mutex);
pthread_mutex_destroy(&sort_left_mutex);
}
+ nthreads = nthreads_save;
#endif
}
diff --git a/usr.bin/sort/sort.1.in b/usr.bin/sort/sort.1.in
index e24f353..12f63ec 100644
--- a/usr.bin/sort/sort.1.in
+++ b/usr.bin/sort/sort.1.in
@@ -33,7 +33,7 @@
.\"
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
.\"
-.Dd May 6, 2012
+.Dd May 25, 2012
.Dt SORT 1
.Os
.Sh NAME
@@ -358,6 +358,9 @@ This sort algorithm cannot be used with
.Fl u
and
.Fl s .
+.It Fl Fl mmap
+Try to use file memory mapping system call.
+It may increase speed in some cases.
.El
.Pp
The following operands are available:
diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c
index f802ced..dd0ed68 100644
--- a/usr.bin/sort/sort.c
+++ b/usr.bin/sort/sort.c
@@ -89,6 +89,7 @@ const char *nlsstr[] = { "",
"[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
"[-o outfile] [--batch-size size] [--files0-from file] "
"[--heapsort] [--mergesort] [--radixsort] [--qsort] "
+ "[--mmap] "
#if defined(SORT_THREADS)
"[--nthreads thread_no] "
#endif
@@ -138,7 +139,8 @@ enum
QSORT_OPT,
MERGESORT_OPT,
HEAPSORT_OPT,
- RADIXSORT_OPT
+ RADIXSORT_OPT,
+ MMAP_OPT
};
#define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
@@ -164,6 +166,7 @@ struct option long_options[] = {
{ "key", required_argument, NULL, 'k' },
{ "merge", no_argument, NULL, 'm' },
{ "mergesort", no_argument, NULL, MERGESORT_OPT },
+ { "mmap", no_argument, NULL, MMAP_OPT },
{ "month-sort", no_argument, NULL, 'M' },
{ "numeric-sort", no_argument, NULL, 'n' },
{ "output", required_argument, NULL, 'o' },
@@ -1063,12 +1066,16 @@ main(int argc, char **argv)
tmpdir = sort_strdup(optarg);
break;
case 't':
- if (strlen(optarg) > 1) {
- if (strcmp(optarg, "\\0")) {
+ while (strlen(optarg) > 1) {
+ if (optarg[0] != '\\') {
errx(2, "%s: %s\n",
strerror(EINVAL), optarg);
}
- *optarg = 0;
+ optarg += 1;
+ if (*optarg == '0') {
+ *optarg = 0;
+ break;
+ }
}
sort_opts_vals.tflag = true;
sort_opts_vals.field_sep = btowc(optarg[0]);
@@ -1126,6 +1133,9 @@ main(int argc, char **argv)
case MERGESORT_OPT:
sort_opts_vals.sort_method = SORT_MERGESORT;
break;
+ case MMAP_OPT:
+ use_mmap = true;
+ break;
case HEAPSORT_OPT:
sort_opts_vals.sort_method = SORT_HEAPSORT;
break;
@@ -1258,6 +1268,11 @@ main(int argc, char **argv)
}
}
+#if defined(SORT_THREADS)
+ if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
+ nthreads = 1;
+#endif
+
if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
struct file_list fl;
struct sort_list list;
diff --git a/usr.bin/sort/sort.h b/usr.bin/sort/sort.h
index 9d4d1ed..f6505c9 100644
--- a/usr.bin/sort/sort.h
+++ b/usr.bin/sort/sort.h
@@ -55,6 +55,7 @@ extern nl_catd catalog;
extern const char *nlsstr[];
#if defined(SORT_THREADS)
+#define MT_SORT_THRESHOLD (10000)
extern size_t ncpu;
extern size_t nthreads;
#endif
OpenPOWER on IntegriCloud