diff -ru rte-orig/mp1e/common/mmx.c rte/mp1e/common/mmx.c --- rte-orig/mp1e/common/mmx.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/common/mmx.c 2005-12-11 17:52:22.000000000 +0100 @@ -153,12 +153,14 @@ if (FEATURE(AMD_MMX | AMD_3DNOW)) return CPU_K6_2; } - } else if (!strncmp(c.s + 4, "CyrixInstead", 12)) { + } else if ((!strncmp(c.s + 4, "CyrixInstead", 12)) || (!strncmp(c.s + 4, "CentaurHauls", 12))) { if (cpuid(&c, 0x80000000) > 0x80000000) { cpuid(&c, 0x80000001); if (FEATURE(CYRIX_MMX | CYRIX_MMXEXT | CYRIX_3DNOW)) return CPU_CYRIX_III; + if (FEATURE(CYRIX_MMX | CYRIX_3DNOW)) + return CPU_VIA_C3; } else { cpuid(&c, 1); diff -ru rte-orig/mp1e/common/mmx.h rte/mp1e/common/mmx.h --- rte-orig/mp1e/common/mmx.h 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/common/mmx.h 2005-12-11 17:52:22.000000000 +0100 @@ -56,6 +56,7 @@ #define CPU_ATHLON 6 /* MMX, 3DNOW, AMD 3DNOW ext, CMOV, SSE int; K7 core */ #define CPU_CYRIX_MII 7 /* MMX, CMOV */ #define CPU_CYRIX_III 8 /* MMX, Cyrix MMX ext, 3DNOW, CMOV */ +#define CPU_VIA_C3 9 /* MMX, 3DNOW */ extern int cpu_detection(void); diff -ru rte-orig/mp1e/devices/oss.c rte/mp1e/devices/oss.c --- rte-orig/mp1e/devices/oss.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/devices/oss.c 2005-12-11 17:52:22.000000000 +0100 @@ -60,6 +60,58 @@ struct tfmem tfmem; }; + +static double max = 0.0, sum = 0.0; +static long samples = 0L; +static int persec, firsttime = 1; + +static void volume(unsigned char *data, ssize_t used) +{ + register short int *p = (short int *)data; + register int i = used >> 1; + auto double samp; +#if 0 + auto double rms; +#endif + auto long lsamp; + + samples += i; + + while (i > 0) { + lsamp = (*p & 0xffff) << 16; + samp = (double)lsamp / 0x7fffffffL; + +#if 0 + sum += samp * samp; +#endif + + if (max < samp) + max = samp; + + p++; + i--; + } + + if (firsttime && (samples >= persec)) { // after first second ... + firsttime = 0; + + printv(1, "\nAmplitude max=%.6f\n", max); + + if (max < 0.01) { // silence! + printv(1, "Silence only detected - stopping encder\n"); + exit(1); + } + } + +#if 0 + rms = sqrt(sum / samples); + + if (rms > 0.1) + printv(1, "\nWERBUNG?\n"); +#endif +} + + static void wait_full(fifo *f) { @@ -70,6 +122,7 @@ unsigned char *p; ssize_t r, n; double now; + extern int halt; assert(b->data == NULL); /* no queue */ @@ -145,7 +198,7 @@ /* data lost, out of sync; XXX 1.98 bad */ oss->time = now; - printv(0, "audio dropped, dt=%f\n", dt); + printv(1, "audio dropped, dt=%f\n", dt); } else { oss->time += mp1e_timestamp_filter (&oss->tfmem, dt, 0.05, 1e-7, 0.08); @@ -166,6 +219,9 @@ ASSERT("write raw audio data", write(oss->fd2, b->data, b->used) == b->used); + if (halt && firsttime) + volume(b->data, b->used); + send_full_buffer(&oss->pcm.producer, b); } @@ -205,6 +261,11 @@ oss->pcm.sampling_rate = sampling_rate; oss->pcm.stereo = stereo; + if (stereo) + persec = sampling_rate << 1; + else + persec = sampling_rate; + if (test_mode & 64) ASSERT("open raw audio file", (oss->fd2 = open("raw-audio", O_WRONLY | O_CREAT, 0666)) != -1); diff -ru rte-orig/mp1e/devices/v4l25.c rte/mp1e/devices/v4l25.c --- rte-orig/mp1e/devices/v4l25.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/devices/v4l25.c 2005-12-11 17:52:22.000000000 +0100 @@ -154,6 +154,7 @@ vbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; vbuf.memory = V4L2_MEMORY_MMAP; vbuf.index = b - buffers; + vbuf.memory = V4L2_MEMORY_MMAP; ASSERT("enqueue capture buffer", 0 == _ioctl (fd, VIDIOC_QBUF, &vbuf)); @@ -191,6 +192,7 @@ int hmod, vmod, i, width1, height1; v4l2_std_id std; struct v4l2_standard standard; + struct v4l2_input chan; if (verbose >= 3) log_fp = stderr; @@ -213,7 +215,15 @@ FAIL("%s ('%s') does not support streaming i/o.", cap_dev, vcap.card); - printv(2, "Opened V4L2 (new) %s ('%s')\n", cap_dev, vcap.card); + printv(2, "Opened %s V4L25('%s')\n", cap_dev, vcap.card); + + /* Set a/v input source */ + + ASSERT("set video channel", ioctl(fd, VIDIOC_S_INPUT, &source) == 0); + memset(&chan, 0, sizeof(chan)); + chan.index = source; + ASSERT("query video channel", ioctl(fd, VIDIOC_ENUMINPUT, &chan) == 0); + printv(2, "Setting input channel %d:%s.\n", source, chan.name); ASSERT("query current video standard", 0 == _ioctl (fd, VIDIOC_G_STD, &std)); @@ -441,6 +451,7 @@ vrbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; vrbuf.memory = V4L2_MEMORY_MMAP; vrbuf.count = MAX(cap_buffers, min_cap_buffers); + vrbuf.memory = V4L2_MEMORY_MMAP; ASSERT("request capture buffers", 0 == _ioctl (fd, VIDIOC_REQBUFS, &vrbuf)); Nur in rte/mp1e/devices: v4l25.c.orig. diff -ru rte-orig/mp1e/devices/v4l2.c rte/mp1e/devices/v4l2.c --- rte-orig/mp1e/devices/v4l2.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/devices/v4l2.c 2005-12-11 17:52:22.000000000 +0100 @@ -353,6 +353,7 @@ unsigned int probed_modes = 0; int min_cap_buffers = video_look_ahead(gop_sequence); int hmod, vmod, i, width1, height1; + struct video_channel chan; ASSERT("open video capture device", (fd = open(cap_dev, O_RDWR)) != -1); @@ -372,9 +373,17 @@ "%s will not work with the v4l2 read(2) interface.", cap_dev, vcap.name, program_invocation_short_name); - printv(2, "Opened %s ('%s')\n", cap_dev, vcap.name); + printv(2, "Opened %s V4L2('%s')\n", cap_dev, vcap.name); + /* Set a/v input source */ + + memset(&chan, 0, sizeof(chan)); + chan.channel = source; + ASSERT("query video channel", ioctl(fd, VIDIOCGCHAN, &chan) == 0); + printv(2, "Setting input channel %d:%s.\n", source, chan.name); + ASSERT("set video channel", ioctl(fd, VIDIOCSCHAN, &chan) == 0); + ASSERT("query current video standard", IOCTL(fd, VIDIOC_G_STD, &vstd) == 0); diff -ru rte-orig/mp1e/devices/v4l.c rte/mp1e/devices/v4l.c --- rte-orig/mp1e/devices/v4l.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/devices/v4l.c 2005-12-11 17:52:22.000000000 +0100 @@ -41,6 +41,22 @@ #include "../options.h" #include "../video/video.h" +typedef struct { + int pfp_halfwidth; // 0 + int pfp_height; // 4 + + char *pfp_frame1; // 8 + char *pfp_frame2; // 12 + + char *pfp_dest_y; // 16 + char *pfp_dest_u; // 20 + char *pfp_dest_v; // 24 + +// mmx_t scratch[8]; +} __attribute__ ((aligned (64))) pre_filter_param; + +extern int pre_filter_1(pre_filter_param *, int reserved2, int reserved3) __attribute__ ((regparm (3))); + static int fd; static fifo cap_fifo; static producer cap_prod; @@ -50,6 +66,9 @@ static struct tfmem tfmem; static int use_mmap = 0; +static int use_mmap_ext = 0; +static double use_mmap_ext_jif = 0.01; // 100 hz jiffies default +static double use_mmap_ext_time0; static int gb_frame = 0; static struct video_mmap gb_buf; static struct video_mbuf gb_buffers; @@ -100,6 +119,37 @@ return cap_time; } +int pre_filter_2(pre_filter_param *f_par, int reserved2, int reserved3) +{ +long line; +long pixel; +long half_width = f_par->pfp_halfwidth; +long pitch = f_par->pfp_halfwidth << 2; +unsigned char *t1,*t2,*t3; +unsigned char *s1, *s2; +t1=f_par->pfp_dest_y; +t2=f_par->pfp_dest_u; +t3=f_par->pfp_dest_v; +s1=f_par->pfp_frame1; +s2=f_par->pfp_frame2; +for(line=0;line>2; + t1[1]=((int)s1[4]+(int)s1[6]+(int)s2[4]+(int)s2[6])>>2; + t1[half_width+0]=((int)s1[pitch+0]+(int)s1[pitch+2]+(int)s2[pitch+0]+(int)s2[pitch+2])>>2; + t1[half_width+1]=((int)s1[pitch+4]+(int)s1[pitch+6]+(int)s2[pitch+4]+(int)s2[pitch+6])>>2; + t1+=2; + (*t3++)=((int)s1[3]+(int)s1[7]+(int)s1[pitch+3]+(int)s1[pitch+7]+(int)s2[3]+(int)s2[7]+(int)s2[pitch+3]+(int)s2[pitch+7])>>3; + (*t2++)=((int)s1[1]+(int)s1[5]+(int)s1[pitch+1]+(int)s1[pitch+5]+(int)s2[1]+(int)s2[5]+(int)s2[pitch+1]+(int)s2[pitch+5])>>3; + s1+=8; + s2+=8; + } + s1+=pitch; + s2+=pitch; + t1+=half_width; + } +} + static void * v4l_cap_thread(void *unused) { @@ -113,17 +163,60 @@ if (use_mmap) { int num_frame2 = (gb_frame+1) % gb_buffers.frames; + long tmp_frame1; + pre_filter_param f_param; + int actual_VIDIOCSYNC=use_mmap_ext?201:VIDIOCSYNC; + static double time_fix = 0.0; + static long last_stamp; /* mw: rationally EAGAIN should be returned, instead we get EINVAL, grrr */ - if (IOCTL(fd, VIDIOCSYNC, &gb_frame) < 0) + tmp_frame1 = gb_frame; + if (IOCTL(fd, actual_VIDIOCSYNC, &tmp_frame1) < 0) ASSERT("VIDIOCSYNC", errno == EAGAIN || errno == EINVAL); - if (fix_interlaced) { + if ((fix_interlaced==1)||(fix_interlaced==3)) { if (IOCTL(fd, VIDIOCSYNC, &num_frame2) < 0) ASSERT("VIDIOCSYNC", errno == EAGAIN || errno == EINVAL); } + if (use_mmap_ext) { + if (time_fix > 0) { + double tmp_inc = ((tmp_frame1 - last_stamp) & 0xFFFF)*use_mmap_ext_jif; + printv(3, "[DEBUG]: tmp_inc=%f\n", tmp_inc); + use_mmap_ext_time0 += tmp_inc; + tmp_inc = use_mmap_ext_time0 - time_fix; + if (tmp_inc < (-2.0)) { // fixme. Need something better :/ + printv(2, "Warning: Excessive video frames detected which is not be properly handled yet!\n"); + use_mmap_ext_time0 -= tmp_inc; + } + if (tmp_inc < tfmem.ref) { // fixme. Need something better :/ + //printv(2, "DEBUG Warning: tmp_inc=%f < %f\n", tmp_inc, tfmem.ref); + tmp_inc = tfmem.ref; + } + if (tmp_inc > 6*tfmem.ref) { + printv(2, "DEBUG Warning: tmp_inc=%f : %ld %d %d)\n", tmp_inc, last_stamp, gb_frame, num_frame2); + tmp_inc = 6*tfmem.ref; + } + time_fix += tmp_inc; + } else + time_fix = use_mmap_ext_time0; + last_stamp = tmp_frame1; + b->time = time_fix; + } + else b->time = timestamp2(b); + if (fix_interlaced==3) { + f_param.pfp_halfwidth = gb_buf.width >> 1; + f_param.pfp_height = gb_buf.height; + f_param.pfp_frame1 = video_buf+gb_buffers.offsets[gb_frame]; + f_param.pfp_frame2 = video_buf+gb_buffers.offsets[num_frame2]; + f_param.pfp_dest_y = b->data; + f_param.pfp_dest_u = f_param.pfp_dest_y + (f_param.pfp_halfwidth*f_param.pfp_height); + f_param.pfp_dest_v = f_param.pfp_dest_u + ((f_param.pfp_halfwidth*f_param.pfp_height) >> 2); + pre_filter_1(&f_param, 0, 0); + asm volatile ("emms;\n"); + } + else { /* NB: typ. 3-20 MB/s, a horrible waste of cycles. */ if (filter_mode != CM_YVU) memcpy(b->data, video_buf @@ -138,6 +231,7 @@ bytes>>2); memcpy(b->data+(int)(bytes*1.25), p+bytes, bytes>>2); } + } b->used = b->size; @@ -145,7 +239,7 @@ ASSERT("VIDIOCMCAPTURE", IOCTL(fd, VIDIOCMCAPTURE, &gb_buf) >= 0); gb_frame = (gb_frame+1) % gb_buffers.frames; - if (fix_interlaced) { + if ((fix_interlaced==1)||(fix_interlaced==3)) { gb_buf.frame = (gb_buf.frame+1) % gb_buffers.frames; ASSERT("VIDIOCMCAPTURE", IOCTL(fd, VIDIOCMCAPTURE, &gb_buf) >= 0); @@ -176,7 +270,7 @@ n -= r; } - if (fix_interlaced) { + if (fix_interlaced==1) { n = b->size; p = b->data; while (n > 0) { @@ -253,8 +347,18 @@ FAIL("%s ('%s') is not a video capture device", cap_dev, vcap.name); - printv(2, "Opened %s ('%s')\n", cap_dev, vcap.name); + printv(2, "Opened %s V4L('%s')\n", cap_dev, vcap.name); + + /* Set a/v input source */ + + CLEAR(&vchan); + vchan.channel = source; + if (IOCTL(fd, VIDIOCGCHAN, &vchan) == 0) { + printv(2, "channel %s\n", vchan.name); + ASSERT("set video channel", IOCTL(fd, VIDIOCSCHAN, &vchan) == 0); + printv(2, "Setting input channel %d:%s.\n", source, vchan.name); + } /* Unmute audio (bttv) */ @@ -285,9 +389,11 @@ printv(2, "Apparently the device has no tuner\n"); CLEAR(&vchan); - vchan.channel = 0; /* first channel */ + vchan.channel = source; if (IOCTL(fd, VIDIOCGCHAN, &vchan) == 0) { + printv(2, "Setting input channel %d:%s.\n", source, vchan.name); + ASSERT("set video channel", IOCTL(fd, VIDIOCSCHAN, &vchan) == 0); vtuner.mode = vchan.norm; } else { printv(2, "Failed to query current video input of %s (VIDIOCGCHAN),\n" @@ -336,6 +442,18 @@ printv(2, "Source frame rate is %f Hz.\n", par->frame_rate); mp1e_timestamp_init(&tfmem, 1.0 / par->frame_rate); + { + int jif_hz=0; + if (IOCTL(fd, 203, &jif_hz) == 0) // FIXME! Very ugly hack for patched km. + use_mmap_ext=1; + if (!jif_hz) { + jif_hz=100; + printv(0, "Warning: km is wrong version, please update it.\n"); + } + printv(2, "Assuming kernel jiffies are %d Hz.\n", jif_hz); + use_mmap_ext_jif=1.0/jif_hz; + } + par->width = saturate(par->width, 1, MAX_WIDTH); par->height = saturate(par->height, 1, MAX_HEIGHT); @@ -373,6 +491,11 @@ /* Capture setup */ + if (fix_interlaced==2) { + if (IOCTL(fd, 202, NULL) != 0) // FIXME! Very ugly hack for patched km. + FAIL("Fatal: rate control you requested seems not provided by the driver, sorry.\n"); + } + if (IOCTL(fd, VIDIOCGMBUF, &gb_buffers) == -1) { // FAIL("V4L read interface does not work, sorry.\n" // "Please send patches to zapping-misc@lists.sf.net.\n"); @@ -385,12 +508,15 @@ /* Set capture format and dimensions */ + if (fix_interlaced==3) + FAIL("Pre-filter #3 does not work for read interface, sorry.\n"); + CLEAR(&vpict); ASSERT("determine the current image format of %s (VIDIOCGPICT)", IOCTL(fd, VIDIOCGPICT, &vpict) == 0, cap_dev); - if (YUV420(filter_mode)) + if (YUV420(filter_mode)&&(fix_interlaced!=3)) vpict.palette = VIDEO_PALETTE_YUV420P; else vpict.palette = VIDEO_PALETTE_YUYV; @@ -421,8 +547,8 @@ } } - if (YUV420(filter_mode) && fix_interlaced) - FAIL("-K mode requires YUYV, sorry.\n"); +// if (YUV420(filter_mode) && fix_interlaced) +// FAIL("-K mode requires YUYV, sorry.\n"); filter_mode1 = filter_mode; @@ -430,6 +556,8 @@ int auto_size = 0; CLEAR(&vwin); vwin.width = par->width; + if (fix_interlaced==3) + vwin.width = vwin.width << 1; vwin.height = par->height; vwin.chromakey = -1; @@ -445,6 +573,8 @@ filter_mode = filter_mode1; par->height = vwin2.height; par->width = vwin2.width; + if (fix_interlaced==3) + par->width = par->width >> 1; continue; } FAIL("Failed to set the grab size of %s to %dx%d, " @@ -467,6 +597,8 @@ par->width = vwin.width; par->height = vwin.height; + if (fix_interlaced==3) + par->width = par->width >> 1; #endif } else { int r; @@ -479,6 +611,9 @@ if (gb_buffers.frames < 2) FAIL("Expected 2+ buffers from %s, got %d", cap_dev, gb_buffers.frames); + if ((gb_buffers.frames < 4)&&((fix_interlaced==1)||(fix_interlaced==3))) + FAIL("Pre-filter #%d need 4+ buffers from %s, got %d", fix_interlaced, + cap_dev, gb_buffers.frames); printv(2, "Mapping capture buffers\n"); @@ -502,9 +637,11 @@ CLEAR(&gb_buf); gb_buf.frame = 0; gb_buf.width = par->width; + if (fix_interlaced==3) + gb_buf.width = gb_buf.width << 1; gb_buf.height = par->height; - if (YUV420(filter_mode)) + if (YUV420(filter_mode) && (fix_interlaced!=3)) gb_buf.format = VIDEO_PALETTE_YUV420P; else gb_buf.format = VIDEO_PALETTE_YUYV; @@ -550,14 +687,25 @@ if (IOCTL(fd, VIDIOCGPICT, &vpict) == 0) { switch (vpict.palette) { case VIDEO_PALETTE_YUV420P: + if (!YUV420(filter_mode)) // XXX commit this. filter_mode = CM_YUV; + ASSERT("find suitable filter. Please try without -K option.\n", + fix_interlaced==0); break; case VIDEO_PALETTE_YUYV: case VIDEO_PALETTE_YUV422: + if (fix_interlaced==3) { + if (!YUV420(filter_mode)) + filter_mode = CM_YUV; + } else { + if (YUV420(filter_mode)) // XXX commit this. filter_mode = CM_YUYV; + } break; } } + if (fix_interlaced==3) + par->width = par->width >> 1; continue; } FAIL("Failed to start capturing (VIDIOCMCAPTURE) from %s, maybe the device doesn't\n" @@ -566,6 +714,8 @@ cap_dev, gb_buf.width, gb_buf.height); } } + + use_mmap_ext_time0 = current_time(); /* In case there are many buffers make full use of them */ for (gb_buf.frame=1; gb_buf.frame < gb_buffers.frames; gb_buf.frame++) ASSERT("queue remaining free buffers (VIDIOCMCAPTURE) of %s\n", @@ -573,18 +723,24 @@ cap_dev); par->width = gb_buf.width; + if (fix_interlaced==3) + par->width = par->width >> 1; par->height = gb_buf.height; } + if (fix_interlaced==3) { + if ((par->width != 384)&&(par->width != 360)&&(par->width != 320)) + FAIL("Fatal: pre-filter #3 is only available for 384, 360 and 320 frame width, sorry.\n"); + if (!YUV420(filter_mode)) + FAIL("Fatal: pre-filter #3 only works with 420p filters. Specify -F1 or a like.\n"); + } + if (width > par->width) width = par->width; if (height > par->height) height = par->height; - if (YUV420(filter_mode) && fix_interlaced) - FAIL("-K mode requires YUYV, sorry.\n"); - - if (fix_interlaced) + if ((fix_interlaced==1)||(fix_interlaced==2)) par->sample_aspect = video_sampling_aspect (par->frame_rate, par->width >> 1, par->height >> 1); else diff -ru rte-orig/mp1e/global_data.c rte/mp1e/global_data.c --- rte-orig/mp1e/global_data.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/global_data.c 2005-12-11 17:52:22.000000000 +0100 @@ -111,6 +111,10 @@ int aud_buffers = 32; // audio compression -> mux #endif +int source = 0; // v4l source +int halt = 0; // halt, if silence only +double sys_load[4]; + int cpu_type = 0; // detect /* Work-arounds for the AIW v4l driver */ diff -ru rte-orig/mp1e/main.c rte/mp1e/main.c --- rte-orig/mp1e/main.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/main.c 2005-12-11 17:52:22.000000000 +0100 @@ -19,6 +19,8 @@ /* $Id: main.c,v 1.42 2003/01/15 23:33:46 mschimek Exp $ */ +#define ANALOGTVPATCHLEVEL 9 + #include #include #include @@ -359,6 +361,8 @@ terminate(int signum) { double now; + FILE *f = fopen("/tmp/mp1e.log", "w"); + extern double sys_load[4]; printv(3, "Received termination signal\n"); @@ -375,6 +379,15 @@ // XXX allow cancelling } + if (f != (FILE *)NULL) { + fprintf(f, "%d %llu %f %f %f %llu %llu\n", + ANALOGTVPATCHLEVEL, + mux->status.bytes_out, + sys_load[0], sys_load[1], sys_load[2] / sys_load[3], + video_frame_count, video_frames_dropped); + fclose(f); + } + printv(1, "\nStop at %f\n", now); } @@ -387,6 +400,7 @@ struct pcm_context *pcm = 0; struct filter_param fp[2]; char *errstr = NULL; + extern double sys_load[4]; #ifndef HAVE_PROGRAM_INVOCATION_NAME program_invocation_short_name = av[0] @@ -395,6 +409,13 @@ options(ac, av); + printv(1, "analogTV: mp1e [analogTV patchlevel #%d]\n", ANALOGTVPATCHLEVEL); + + sys_load[0] = 999.0; // min + sys_load[1] = 0.0; // max + sys_load[2] = 0.0; // sum + sys_load[3] = 0.0; // cnt + #if 0 if (mux_syn != 4) { FAIL("Temporarily out of order. No bug reports please."); Nur in rte/mp1e: main.c.orig. diff -ru rte-orig/mp1e/options.c rte/mp1e/options.c --- rte-orig/mp1e/options.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/options.c 2005-12-11 17:52:22.000000000 +0100 @@ -102,6 +102,8 @@ " -r line,vol Audio record source 1..30%s,\n" " volume 0..100 %d,%d\n" " -x name Audio mixer device (OSS API) %s\n" + " -d channel video source (2=S-Video)\n" + " -h halt, if only silence\n" " -M mode RF audio 0 = unmute, 1 = mute, 2 = ignore %s\n" "\n" " -i filename Source configuration file\n" @@ -127,7 +129,7 @@ exit((fi == stderr) ? EXIT_FAILURE : EXIT_SUCCESS); } -#define OPT_STR "02a:b:c:e:f:g:hi:k:lm:n:o:p:r:s:t:vwx:zA:C:B:F:G:H:I:J:KL:M:PR:S:T:VX:" +#define OPT_STR "02a:b:c:d:e:f:g:hi:jk:lm:n:o:p:r:s:t:vwx:zA:C:B:F:G:H:I:J:K:L:M:PR:S:T:VX:" static const struct option long_options[] = { @@ -136,11 +138,13 @@ { "audio_mode", required_argument, NULL, 'a' }, { "video_bit_rate", required_argument, NULL, 'b' }, { "capture_device", required_argument, NULL, 'c' }, + { "video_source", required_argument, NULL, 'd' }, { "skip_method", required_argument, NULL, 'e' }, { "frame_rate", required_argument, NULL, 'f' }, { "gop_sequence", required_argument, NULL, 'g' }, { "help", no_argument, NULL, 'h' }, { "config", required_argument, NULL, 'i' }, + { "halt", required_argument, NULL, 'j' }, { "break", required_argument, NULL, 'k' }, { "letterbox", no_argument, NULL, 'l' }, { "mux_mode", required_argument, NULL, 'm' }, @@ -163,7 +167,7 @@ { "frames_per_seq_header", required_argument, NULL, 'H' }, { "vbi_device", required_argument, NULL, 'I' }, { "source_fps", required_argument, NULL, 'J' }, /* AIW */ - { "half_rate", no_argument, NULL, 'K' }, /* AIW */ + { "half_rate", required_argument, NULL, 'K' }, /* AIW */ { "sample_aspect", required_argument, NULL, 'L' }, { "mute", required_argument, NULL, 'M' }, { "preview", required_argument, NULL, 'P' }, @@ -333,7 +337,7 @@ switch (c) { case '0': -#ifdef OPTIONS_M2I +#if 1 // def OPTIONS_M2I m2i = 1; #endif break; @@ -343,7 +347,7 @@ break; case 'a': - if ((audio_mode = suboption(audio_options, 4, audio_mode)) < 0) + if ((audio_mode = suboption(audio_options, 24, audio_mode)) < 0) return FALSE; /* * 0 = stereo, 1 = joint stereo, 2 = dual channel, 3 = mono; @@ -404,6 +408,15 @@ break; } + case 'd': + source = strtol(optarg, NULL, 0); + if (source < 0) + usage(stdout); + break; + + case 'j': + halt = 1; + case 'm': modules = multi_suboption(mux_options, 5, modules); if (modules <= 0 || modules > 7) @@ -585,7 +598,13 @@ break; case 'K': - fix_interlaced=1; + if (optarg) { + fix_interlaced = strtol(optarg, NULL, 0); + if ((fix_interlaced < 1)||(fix_interlaced > 3)) + return FALSE; + } + else + fix_interlaced = 1; break; case 'L': diff -ru rte-orig/mp1e/options.h rte/mp1e/options.h --- rte-orig/mp1e/options.h 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/options.h 2005-12-11 17:52:22.000000000 +0100 @@ -83,6 +83,9 @@ extern int vid_buffers; extern int aud_buffers; +extern int source; +extern int halt; + extern int cpu_type; /* AIW hacks */ diff -ru rte-orig/mp1e/systems/mpeg1.c rte/mp1e/systems/mpeg1.c --- rte-orig/mp1e/systems/mpeg1.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/systems/mpeg1.c 2005-12-11 17:52:22.000000000 +0100 @@ -551,8 +551,18 @@ #ifdef VIDEO_FIFO_TEST extern double in_fifo_load, out_fifo_load; #endif - double system_load = 1.0 - get_idle(); + double system_load = 100 * (1.0 - get_idle()); int min, sec; + extern double sys_load[4]; + + if (system_load < sys_load[0]) + sys_load[0] = system_load; + + if ((system_load < 100.0) && (system_load > sys_load[1])) + sys_load[1] = system_load; + + sys_load[2] += system_load; + sys_load[3]++; sec = front_pts / SYSTEM_TICKS; min = sec / 60; @@ -563,13 +573,13 @@ " [V%2.1f:%2.1f]", min, sec, mux->status.bytes_out / (double)(1 << 20), - 100.0 * system_load, + system_load, in_fifo_load, out_fifo_load); #else printv(1, "%d:%02d (%.1f MB), system load %4.1f %%", min, sec, mux->status.bytes_out / (double)(1 << 20), - 100.0 * system_load); + system_load); #endif if (video_frames_dropped > 0) printv(1, ", %llu (%5.2f %%) dropped", diff -ru rte-orig/mp1e/video/filter_mmx.s rte/mp1e/video/filter_mmx.s --- rte-orig/mp1e/video/filter_mmx.s 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/video/filter_mmx.s 2005-12-11 17:52:22.000000000 +0100 @@ -3451,3 +3451,906 @@ popl %ebx popl %esi ret + +pfp_384_by1 = 384; +pfp_384_by4 = 1536; + +pfp_360_by1 = 360; +pfp_360_by4 = 1440; + +pfp_320_by1 = 320; +pfp_320_by4 = 1280; + +pfp_halfwidth = 0; // immediate +pfp_height = 4; // high word(ecx), decreases. +pfp_frame1 = 8; // esi +pfp_frame2 = 12; // edi +pfp_dest_y = 16; // ebx +pfp_dest_u = 20; // edx +pfp_dest_v = 24; // eax +// position within current line // cx +// + .text + .align 16 + .globl pre_filter_1 +pre_filter_1: + pushl %ebx; + pushl %esi; + pushl %edi; + + movl pfp_frame1(%eax),%esi; + movl pfp_frame2(%eax),%edi; + movl pfp_height(%eax),%ecx; + roll $16,%ecx; + movl pfp_dest_y(%eax),%ebx; + + movl pfp_halfwidth(%eax),%edx; + cmpl $384, %edx; + je pre_filter_1_384; + cmpl $360, %edx; + je pre_filter_1_360; + cmpl $320, %edx; + je pre_filter_1_320; + + popl %edi; + popl %esi; + popl %ebx; + ret + +pre_filter_1_384: + movl pfp_dest_u(%eax),%edx; + movl pfp_dest_v(%eax),%eax; + +pre_filter_1_384_1: + movw $pfp_384_by1, %cx; +pre_filter_1_384_2: + // #1 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_384_by4(%esi), %mm2; + movq pfp_384_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + + // *** #1, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $48, %mm4; + movq %mm4, %mm6; // mm6 bytes = Y1t Y1t 0 0 0 0 0 0 + // *** #1, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $16, %mm4; + por %mm4, %mm6; // mm6 = Y1t Y1t 0 0 Y1b Y1b 0 0 + // *** #1, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + movq %mm0, %mm7; + + // #2 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_384_by4(%esi), %mm2; + movq pfp_384_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + // *** #2, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $32, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b 0 0 + // *** #2, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b Y2b Y2b + // *** #2, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $8, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 0 0 V1 V2 0 0 + // Save YY for #1,#2 + movd %mm6, (%ebx); + psrlq $32, %mm6; + movd %mm6, pfp_384_by1(%ebx); + addl $4, %ebx; + + // #3 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_384_by4(%esi), %mm2; + movq pfp_384_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + + // *** #3, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $48, %mm4; + movq %mm4, %mm6; // mm6 bytes = Y1t Y1t 0 0 0 0 0 0 + // *** #3, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $16, %mm4; + por %mm4, %mm6; // mm6 = Y1t Y1t 0 0 Y1b Y1b 0 0 + // *** #3, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $16, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 U3 0 V1 V2 V3 0 + + // #4 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_384_by4(%esi), %mm2; + movq pfp_384_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + // *** #4, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $32, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b 0 0 + // *** #4, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b Y2b Y2b + // *** #4, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $24, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 U3 U4 V1 V2 V3 V4 + // Save YY for #3,#4 + movd %mm6, (%ebx); + psrlq $32, %mm6; + movd %mm6, pfp_384_by1(%ebx); + addl $4, %ebx; + + // Save UV... + movd %mm7, (%edx) + psrlq $32, %mm7; + add $4, %edx + movd %mm7, (%eax) + add $4, %eax + + // In-line loop... + subw $8, %cx; + jne pre_filter_1_384_2; + + // Line-by-line loop... + addl $pfp_384_by4, %esi; + addl $pfp_384_by4, %edi; + addl $pfp_384_by1, %ebx; + roll $16, %ecx; + sub $2, %cx; + rorl $16, %ecx; + jne pre_filter_1_384_1; + + + popl %edi; + popl %esi; + popl %ebx; + ret + +pre_filter_1_360: + movl pfp_dest_u(%eax),%edx; + movl pfp_dest_v(%eax),%eax; + +pre_filter_1_360_1: + movw $pfp_360_by1, %cx; +pre_filter_1_360_2: + // #1 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_360_by4(%esi), %mm2; + movq pfp_360_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + + // *** #1, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $48, %mm4; + movq %mm4, %mm6; // mm6 bytes = Y1t Y1t 0 0 0 0 0 0 + // *** #1, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $16, %mm4; + por %mm4, %mm6; // mm6 = Y1t Y1t 0 0 Y1b Y1b 0 0 + // *** #1, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + movq %mm0, %mm7; + + // #2 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_360_by4(%esi), %mm2; + movq pfp_360_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + // *** #2, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $32, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b 0 0 + // *** #2, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b Y2b Y2b + // *** #2, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $8, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 0 0 V1 V2 0 0 + // Save YY for #1,#2 + movd %mm6, (%ebx); + psrlq $32, %mm6; + movd %mm6, pfp_360_by1(%ebx); + addl $4, %ebx; + + // #3 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_360_by4(%esi), %mm2; + movq pfp_360_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + + // *** #3, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $48, %mm4; + movq %mm4, %mm6; // mm6 bytes = Y1t Y1t 0 0 0 0 0 0 + // *** #3, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $16, %mm4; + por %mm4, %mm6; // mm6 = Y1t Y1t 0 0 Y1b Y1b 0 0 + // *** #3, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $16, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 U3 0 V1 V2 V3 0 + + // #4 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_360_by4(%esi), %mm2; + movq pfp_360_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + // *** #4, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $32, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b 0 0 + // *** #4, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b Y2b Y2b + // *** #4, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $24, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 U3 U4 V1 V2 V3 V4 + // Save YY for #3,#4 + movd %mm6, (%ebx); + psrlq $32, %mm6; + movd %mm6, pfp_360_by1(%ebx); + addl $4, %ebx; + + // Save UV... + movd %mm7, (%edx) + psrlq $32, %mm7; + add $4, %edx + movd %mm7, (%eax) + add $4, %eax + + // In-line loop... + subw $8, %cx; + jne pre_filter_1_360_2; + + // Line-by-line loop... + addl $pfp_360_by4, %esi; + addl $pfp_360_by4, %edi; + addl $pfp_360_by1, %ebx; + roll $16, %ecx; + sub $2, %cx; + rorl $16, %ecx; + jne pre_filter_1_360_1; + + + popl %edi; + popl %esi; + popl %ebx; + ret + +pre_filter_1_320: + movl pfp_dest_u(%eax),%edx; + movl pfp_dest_v(%eax),%eax; + +pre_filter_1_320_1: + movw $pfp_320_by1, %cx; +pre_filter_1_320_2: + // #1 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_320_by4(%esi), %mm2; + movq pfp_320_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + + // *** #1, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $48, %mm4; + movq %mm4, %mm6; // mm6 bytes = Y1t Y1t 0 0 0 0 0 0 + // *** #1, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $16, %mm4; + por %mm4, %mm6; // mm6 = Y1t Y1t 0 0 Y1b Y1b 0 0 + // *** #1, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + movq %mm0, %mm7; + + // #2 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_320_by4(%esi), %mm2; + movq pfp_320_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + // *** #2, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $32, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b 0 0 + // *** #2, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b Y2b Y2b + // *** #2, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $8, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 0 0 V1 V2 0 0 + // Save YY for #1,#2 + movd %mm6, (%ebx); + psrlq $32, %mm6; + movd %mm6, pfp_320_by1(%ebx); + addl $4, %ebx; + + // #3 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_320_by4(%esi), %mm2; + movq pfp_320_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + + // *** #3, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $48, %mm4; + movq %mm4, %mm6; // mm6 bytes = Y1t Y1t 0 0 0 0 0 0 + // *** #3, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $16, %mm4; + por %mm4, %mm6; // mm6 = Y1t Y1t 0 0 Y1b Y1b 0 0 + // *** #3, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $16, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 U3 0 V1 V2 V3 0 + + // #4 + movq (%esi), %mm0; + movq (%edi), %mm1; + movq pfp_320_by4(%esi), %mm2; + movq pfp_320_by4(%edi), %mm3; + addl $8, %esi; + addl $8, %edi; + // *** #4, Y, top line. + movq %mm0, %mm4; + movq %mm1, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + psrlq $32, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b 0 0 + // *** #4, Y, bottom line. + movq %mm2, %mm4; + movq %mm3, %mm5; + psllw $8, %mm4; + psllw $8, %mm5; + psrlw $8, %mm4; + psrlw $8, %mm5; + paddw %mm5, %mm4; // mm4 = odd+even YYYY; + movq %mm4, %mm5; + psrld $16, %mm4; + paddw %mm5, %mm4; + psrlw $2,%mm4; + packuswb %mm4, %mm4; + psllw $8, %mm4; + psrlw $8, %mm4; + packuswb %mm4, %mm4; // mm4 bits 0-15 = YY + psllq $48, %mm4; + por %mm4, %mm6; // mm6 bytes = Y1t Y1t Y2t Y2t Y1b Y1b Y2b Y2b + // *** #4, UV, top+bottom. + psrlw $8, %mm0; + psrlw $8, %mm1; + psrlw $8, %mm2; + psrlw $8, %mm3; + paddw %mm1, %mm0; + paddw %mm3, %mm2; + paddw %mm2, %mm0; + punpckldq %mm0, %mm1; + paddw %mm1, %mm0; + psrlw $3, %mm0; // mm0 = ? ? ? ? U 0 V 0 + pxor %mm3, %mm3; + punpckhwd %mm3, %mm0; // mm0 = U 0 0 0 V 0 0 0 + pslld $24, %mm0; + por %mm0, %mm7; // mm7 = U1 U2 U3 U4 V1 V2 V3 V4 + // Save YY for #3,#4 + movd %mm6, (%ebx); + psrlq $32, %mm6; + movd %mm6, pfp_320_by1(%ebx); + addl $4, %ebx; + + // Save UV... + movd %mm7, (%edx) + psrlq $32, %mm7; + add $4, %edx + movd %mm7, (%eax) + add $4, %eax + + // In-line loop... + subw $8, %cx; + jne pre_filter_1_320_2; + + // Line-by-line loop... + addl $pfp_320_by4, %esi; + addl $pfp_320_by4, %edi; + addl $pfp_320_by1, %ebx; + roll $16, %ecx; + sub $2, %cx; + rorl $16, %ecx; + jne pre_filter_1_320_1; + + + popl %edi; + popl %esi; + popl %ebx; + ret diff -ru rte-orig/mp1e/video/mpeg1.c rte/mp1e/video/mpeg1.c --- rte-orig/mp1e/video/mpeg1.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/video/mpeg1.c 2005-12-11 17:52:22.000000000 +0100 @@ -2515,6 +2515,7 @@ if (mpeg1->motion_min && mpeg1->motion_max && mpeg1->rc.np > 0 && mpeg1->rc.nb > 0) { switch (cpu_detection()) { + case CPU_VIA_C3: case CPU_K6_2: case CPU_CYRIX_III: search = _3dn_search; diff -ru rte-orig/mp1e/video/mpeg2.c rte/mp1e/video/mpeg2.c --- rte-orig/mp1e/video/mpeg2.c 2005-12-11 17:52:09.000000000 +0100 +++ rte/mp1e/video/mpeg2.c 2005-12-11 17:52:22.000000000 +0100 @@ -41,7 +41,7 @@ #include "motion.h" #include "video.h" -#ifdef OPTIONS_M2I +#if 1 // def OPTIONS_M2I #define VARQ 65536.0 @@ -1341,6 +1341,7 @@ if (mpeg1->motion_min && mpeg1->motion_max && mpeg1->rc.np > 0 && mpeg1->rc.nb > 0) { switch (cpu_detection()) { + case CPU_VIA_C3: case CPU_K6_2: case CPU_CYRIX_III: search = _3dn_search;