summaryrefslogtreecommitdiffstats
path: root/mt-work
diff options
context:
space:
mode:
Diffstat (limited to 'mt-work')
-rw-r--r--mt-work/email.sh6
-rw-r--r--mt-work/mplayer.diff13
-rw-r--r--mt-work/raw.sh10
-rw-r--r--mt-work/test.sh13
-rw-r--r--mt-work/todo.txt95
-rw-r--r--mt-work/valgrind-check.sh5
-rw-r--r--mt-work/yuvcmp.c182
7 files changed, 324 insertions, 0 deletions
diff --git a/mt-work/email.sh b/mt-work/email.sh
new file mode 100644
index 0000000..e5cdb72
--- /dev/null
+++ b/mt-work/email.sh
@@ -0,0 +1,6 @@
+#!/bin/sh -v
+
+# args [where to put patches] [smtp server] [destination]
+
+git format-patch -o "$1" --inline --subject-prefix=soc --thread origin
+git send-email --no-chain-reply-to --smtp-server $2 --to $3 --dry-run "$1"
diff --git a/mt-work/mplayer.diff b/mt-work/mplayer.diff
new file mode 100644
index 0000000..ef38063
--- /dev/null
+++ b/mt-work/mplayer.diff
@@ -0,0 +1,13 @@
+diff --git a/libmpcodecs/vd_ffmpeg.c b/libmpcodecs/vd_ffmpeg.c
+index 7c68a20..135e6b1 100644
+--- a/libmpcodecs/vd_ffmpeg.c
++++ b/libmpcodecs/vd_ffmpeg.c
+@@ -280,7 +280,7 @@ static int init(sh_video_t *sh){
+ return 0;
+ }
+
+- if(vd_use_slices && (lavc_codec->capabilities&CODEC_CAP_DRAW_HORIZ_BAND) && !do_vis_debug)
++ if(vd_use_slices && (lavc_codec->capabilities&CODEC_CAP_DRAW_HORIZ_BAND) && !do_vis_debug && lavc_param_threads <= 1)
+ ctx->do_slices=1;
+
+ if(lavc_codec->capabilities&CODEC_CAP_DR1 && !do_vis_debug && lavc_codec->id != CODEC_ID_H264 && lavc_codec->id != CODEC_ID_INTERPLAY_VIDEO && lavc_codec->id != CODEC_ID_ROQ && lavc_codec->id != CODEC_ID_VP8 && lavc_codec->id != CODEC_ID_LAGARITH)
diff --git a/mt-work/raw.sh b/mt-work/raw.sh
new file mode 100644
index 0000000..0ced88e
--- /dev/null
+++ b/mt-work/raw.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+fn=`basename "$1"`
+for th in 1 4; do
+ time ./ffmpeg_g -threads $th -skip_loop_filter all -vsync 0 -y -t 30 -i "$1" -an -f rawvideo "raw/n-$fn-$th.yuv"
+done
+
+#for th in 1 4; do
+# time ./ffmpeg_g -threads $th -vsync 0 -y -t 30 -i "$1" -an -f rawvideo "raw/$fn-$th.yuv"
+#done
diff --git a/mt-work/test.sh b/mt-work/test.sh
new file mode 100644
index 0000000..a88a35b
--- /dev/null
+++ b/mt-work/test.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+fn=`basename "$1"`
+for th in 1 2 3 4; do
+ time ./ffmpeg_g -threads $th -flags2 +fast -vsync 0 -y -t 30 -i "$1" -an -f framecrc "crc/$fn-$th.txt" >/dev/null 2>&1
+done
+
+./ffmpeg_g -threads 1 -y -t 10 -i "$1" -an -f framecrc "crc/$fn-1-vsync.txt" >/dev/null 2>&1
+./ffmpeg_g -threads 3 -y -t 10 -i "$1" -an -f framecrc "crc/$fn-3-vsync.txt" >/dev/null 2>&1
+
+md5 "crc/$fn-"[1234].txt
+echo
+md5 "crc/$fn-"*vsync.txt
diff --git a/mt-work/todo.txt b/mt-work/todo.txt
new file mode 100644
index 0000000..f64514f
--- /dev/null
+++ b/mt-work/todo.txt
@@ -0,0 +1,95 @@
+Todo
+
+-- For other people
+- Multithread vc1.
+- Multithread an intra codec like mjpeg (trivial).
+- Fix mpeg1 (see below).
+- Try the first three items under Optimization.
+- Fix h264 (see below).
+- Try mpeg4 (see below).
+
+-- Bug fixes
+
+General critical:
+- Error resilience has to run before ff_report_frame_progress()
+is called. Otherwise there will be race conditions. (This might already
+work.) In general testing error paths should be done more.
+- 'make fate THREADS=2' doesn't pass. Most failures are due to
+bugs in vsync in ffmpeg.c, which are currently obscuring real failures.
+
+h264:
+- Files that aren't parsed (e.g. mp4) and contain PAFF with two
+field pictures in the same packet are not optimal. Modify the
+nals_needed check so that the second field's first slice is
+considered as needed, then uncomment the FIXME code in decode_postinit.
+Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4
+
+mpeg4:
+- Packed B-frames need to be explicitly split up
+when frame threading is on. It's not very fast
+without this.
+- The buffer age optimization is disabled due to
+the way buffers are allocated across threads. The
+branch 'fix_buffer_age' has an attempt to fix it
+which breaks ffplay.
+- Support interlaced.
+
+mpeg1/2:
+- Seeking always prints "first frame not a keyframe"
+with threads on. Currently disabled for this reason.
+
+-- Prove correct
+
+- decode_update_progress() in h264.c
+race_checking branch has some work on h264,
+but not that function. It might be worth putting
+the branch under #ifdef DEBUG in mainline, but
+the code would have to be cleaner.
+- MPV_lowest_referenced_row() and co in mpegvideo.c
+- Same in vp3.
+
+-- Optimization
+
+- Merge h264 decode_update_progress() with loop_filter().
+Add CODEC_CAP_DRAW_HORIZ_BAND as a side effect.
+- EMU_EDGE is always set for h264 PAFF+MT
+because draw_edges() writes into the other field's
+thread's pixels. Needs an option to skip T/B fields.
+- Check update_thread_context() functions and make
+sure they only copy what they need to.
+- Try some more optimization of the "ref < 48; ref++"
+loop in h264.c await_references(), try turning the list0/list1 check
+above into a loop without being slower.
+- Support frame+slice threading at the same time
+by assigning slice_count threads for frame threads
+to use with execute(). This is simpler but unbalanced
+if only one frame thread uses any.
+
+-- Features
+
+- Support streams with width/height changing. This
+requires flushing all current frames (and buffering
+the input in the meantime), closing the codec and
+reopening it. Or don't support it.
+- Support encoding. Might need more threading primitives
+for good ratecontrol; would be nice for audio and libavfilter too.
+- Async decoding part 1: instead of trying to
+start every thread at the beginning, return a picture
+if the earliest thread is already done, but don't wait
+for it. Not sure what effect this would have.
+- Part 2: have an API that doesn't wait for the decoding
+thread, only returns EAGAIN if it's not ready. What will
+it do with the next input packet if it returns that?
+- Have an API that returns finished pictures but doesn't
+require sending new ones. Maybe allow NULL avpkt when
+not at the end of the stream.
+
+-- Samples
+
+http://astrange.ithinksw.net/ffmpeg/mt-samples/
+
+See yuvcmp.c in this directory to compare decoded samples.
+
+For debugging, try commenting out ff_thread_finish_setup calls so
+that only one thread runs at once, and then binary search+
+scatter printfs to look for differences in codec contexts.
diff --git a/mt-work/valgrind-check.sh b/mt-work/valgrind-check.sh
new file mode 100644
index 0000000..276327a
--- /dev/null
+++ b/mt-work/valgrind-check.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 1 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null
+
+valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null
diff --git a/mt-work/yuvcmp.c b/mt-work/yuvcmp.c
new file mode 100644
index 0000000..11585f9
--- /dev/null
+++ b/mt-work/yuvcmp.c
@@ -0,0 +1,182 @@
+/*
+ * originally by Andreas Ă–man (andoma)
+ * some changes by Alexander Strange
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+
+int
+main(int argc, char **argv)
+{
+ int fd[2];
+ int print_pixels = 0;
+ int dump_blocks = 0;
+
+ int width;
+ int height;
+ int to_skip = 0;
+
+ if (argc < 6) {
+ fprintf(stderr, "%s [YUV file 1] [YUV file 2] width height pixelcmp|blockdump (# to skip)\n", argv[0]);
+ return 1;
+ }
+
+ width = atoi(argv[3]);
+ height = atoi(argv[4]);
+ if (argc > 6)
+ to_skip = atoi(argv[6]);
+
+ uint8_t *Y[2], *C[2][2];
+ int i, v, c, p;
+ int lsiz = width * height;
+ int csiz = width * height / 4;
+ int x, y;
+ int cwidth = width / 2;
+ int fr = to_skip;
+ int mb;
+ char *mberrors;
+ int mb_x, mb_y;
+ uint8_t *a;
+ uint8_t *b;
+ int die = 0;
+
+ print_pixels = strstr(argv[5], "pixelcmp") ? 1 : 0;
+ dump_blocks = strstr(argv[5], "blockdump") ? 1 : 0;
+
+ for(i = 0; i < 2; i++) {
+ Y[i] = malloc(lsiz);
+ C[0][i] = malloc(csiz);
+ C[1][i] = malloc(csiz);
+
+ fd[i] = open(argv[1 + i], O_RDONLY);
+ if(fd[i] == -1) {
+ perror("open");
+ exit(1);
+ }
+ fcntl(fd[i], F_NOCACHE, 1);
+
+ if (to_skip)
+ lseek(fd[i], to_skip * (lsiz + 2*csiz), SEEK_SET);
+ }
+
+ mb_x = width / 16;
+ mb_y = height / 16;
+
+ mberrors = malloc(mb_x * mb_y);
+
+ while(!die) {
+ memset(mberrors, 0, mb_x * mb_y);
+
+ printf("Loading frame %d\n", ++fr);
+
+ for(i = 0; i < 2; i++) {
+ v = read(fd[i], Y[i], lsiz);
+ if(v != lsiz) {
+ fprintf(stderr, "Unable to read Y from file %d, exiting\n", i + 1);
+ return 1;
+ }
+ }
+
+
+ for(c = 0; c < lsiz; c++) {
+ if(Y[0][c] != Y[1][c]) {
+ x = c % width;
+ y = c / width;
+
+ mb = x / 16 + (y / 16) * mb_x;
+
+ if(print_pixels)
+ printf("Luma diff 0x%02x != 0x%02x at pixel (%4d,%-4d) mb(%d,%d) #%d\n",
+ Y[0][c],
+ Y[1][c],
+ x, y,
+ x / 16,
+ y / 16,
+ mb);
+
+ mberrors[mb] |= 1;
+ }
+ }
+
+ /* Chroma planes */
+
+ for(p = 0; p < 2; p++) {
+
+ for(i = 0; i < 2; i++) {
+ v = read(fd[i], C[p][i], csiz);
+ if(v != csiz) {
+ fprintf(stderr, "Unable to read %c from file %d, exiting\n",
+ "UV"[p], i + 1);
+ return 1;
+ }
+ }
+
+ for(c = 0; c < csiz; c++) {
+ if(C[p][0][c] != C[p][1][c]) {
+ x = c % cwidth;
+ y = c / cwidth;
+
+ mb = x / 8 + (y / 8) * mb_x;
+
+ mberrors[mb] |= 2 << p;
+
+ if(print_pixels)
+
+ printf("c%c diff 0x%02x != 0x%02x at pixel (%4d,%-4d) "
+ "mb(%3d,%-3d) #%d\n",
+ p ? 'r' : 'b',
+ C[p][0][c],
+ C[p][1][c],
+
+ x, y,
+ x / 8,
+ y / 8,
+ x / 8 + y / 8 * cwidth / 8);
+ }
+ }
+ }
+
+ for(i = 0; i < mb_x * mb_y; i++) {
+ x = i % mb_x;
+ y = i / mb_x;
+
+ if(mberrors[i]) {
+ die = 1;
+
+ printf("MB (%3d,%-3d) %4d %d %c%c%c damaged\n",
+ x, y, i, mberrors[i],
+ mberrors[i] & 1 ? 'Y' : ' ',
+ mberrors[i] & 2 ? 'U' : ' ',
+ mberrors[i] & 4 ? 'V' : ' ');
+
+ if(dump_blocks) {
+ a = Y[0] + x * 16 + y * 16 * width;
+ b = Y[1] + x * 16 + y * 16 * width;
+
+ for(y = 0; y < 16; y++) {
+ printf("%c ", "TB"[y&1]);
+ for(x = 0; x < 16; x++)
+ printf("%02x%c", a[x + y * width],
+ a[x + y * width] != b[x + y * width] ? '<' : ' ');
+
+ printf("| ");
+ for(x = 0; x < 16; x++)
+ printf("%02x%c", b[x + y * width],
+ a[x + y * width] != b[x + y * width] ? '<' : ' ');
+
+ printf("\n");
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
OpenPOWER on IntegriCloud