00001
00002
00003
00004
00005
00006
00007
00008 This program is free software; you can rquantptr it and/or modify
00009 it under the terms of the GNU General Public License as published by
00010 the Free Software Foundation; either version 2 of the License, or
00011 (at your option) any later version.
00012
00013 This program is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016 GNU General Public License for more details.
00017
00018 You should have received a copy of the GNU General Public License
00019 along with this program; if not, write to the Free Software
00020 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00021
00022 This file is a modified version of RTjpeg 0.1.2, (C) Justin Schoeman 1998
00023 */
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #include <sys/types.h>
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <string.h>
00040 #include "rtjpeg_core.h"
00041
00042 static const unsigned char RTjpeg_ZZ[64]={
00043 0,
00044 8, 1,
00045 2, 9, 16,
00046 24, 17, 10, 3,
00047 4, 11, 18, 25, 32,
00048 40, 33, 26, 19, 12, 5,
00049 6, 13, 20, 27, 34, 41, 48,
00050 56, 49, 42, 35, 28, 21, 14, 7,
00051 15, 22, 29, 36, 43, 50, 57,
00052 58, 51, 44, 37, 30, 23,
00053 31, 38, 45, 52, 59,
00054 60, 53, 46, 39,
00055 47, 54, 61,
00056 62, 55,
00057 63 };
00058
00059 static const __u64 RTjpeg_aan_tab[64]={
00060 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
00061 5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL,
00062 5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL,
00063 5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL,
00064 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
00065 3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL,
00066 2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL,
00067 1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL,
00068 };
00069
00070 static const unsigned char RTjpeg_lum_quant_tbl[64] = {
00071 16, 11, 10, 16, 24, 40, 51, 61,
00072 12, 12, 14, 19, 26, 58, 60, 55,
00073 14, 13, 16, 24, 40, 57, 69, 56,
00074 14, 17, 22, 29, 51, 87, 80, 62,
00075 18, 22, 37, 56, 68, 109, 103, 77,
00076 24, 35, 55, 64, 81, 104, 113, 92,
00077 49, 64, 78, 87, 103, 121, 120, 101,
00078 72, 92, 95, 98, 112, 100, 103, 99
00079 };
00080
00081 static const unsigned char RTjpeg_chrom_quant_tbl[64] = {
00082 17, 18, 24, 47, 99, 99, 99, 99,
00083 18, 21, 26, 66, 99, 99, 99, 99,
00084 24, 26, 56, 99, 99, 99, 99, 99,
00085 47, 66, 99, 99, 99, 99, 99, 99,
00086 99, 99, 99, 99, 99, 99, 99, 99,
00087 99, 99, 99, 99, 99, 99, 99, 99,
00088 99, 99, 99, 99, 99, 99, 99, 99,
00089 99, 99, 99, 99, 99, 99, 99, 99
00090 };
00091
00092 int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
00093 {
00094 register int ci, co=1, tmp;
00095 register __s16 ZZvalue;
00096
00097 (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
00098
00099 for(ci=1; ci<=bt8; ci++)
00100 {
00101 ZZvalue = data[RTjpeg_ZZ[ci]];
00102
00103 if(ZZvalue>0)
00104 {
00105 strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue;
00106 }
00107 else
00108 {
00109 strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue;
00110 }
00111 }
00112
00113 for(; ci<64; ci++)
00114 {
00115 ZZvalue = data[RTjpeg_ZZ[ci]];
00116
00117 if(ZZvalue>0)
00118 {
00119 strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue;
00120 }
00121 else if(ZZvalue<0)
00122 {
00123 strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue;
00124 }
00125 else
00126 {
00127 tmp=ci;
00128 do
00129 {
00130 ci++;
00131 }
00132 while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0));
00133
00134 strm[co++]=(__s8)(63+(ci-tmp));
00135 ci--;
00136 }
00137 }
00138 return (int)co;
00139 }
00140
00141 int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
00142 {
00143 int ci=1, co=1, tmp;
00144 register int i;
00145
00146 i=RTjpeg_ZZ[0];
00147 data[i]=((__u8)strm[0])*qtbl[i];
00148
00149 for(co=1; co<=bt8; co++)
00150 {
00151 i=RTjpeg_ZZ[co];
00152 data[i]=strm[ci++]*qtbl[i];
00153 }
00154
00155 for(; co<64; co++)
00156 {
00157 if(strm[ci]>63)
00158 {
00159 tmp=co+strm[ci]-63;
00160 for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0;
00161 co--;
00162 } else
00163 {
00164 i=RTjpeg_ZZ[co];
00165 data[i]=strm[ci]*qtbl[i];
00166 }
00167 ci++;
00168 }
00169 return (int)ci;
00170 }
00171
00172 #if defined(USE_MMX)
00173 void RTjpeg_quant_init(void)
00174 {
00175 int i;
00176 __s16 *qtbl;
00177
00178 qtbl=(__s16 *)RTjpeg_lqt;
00179 for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i];
00180
00181 qtbl=(__s16 *)RTjpeg_cqt;
00182 for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i];
00183 }
00184
00185 static mmx_t RTjpeg_ones=(mmx_t)(long long)0x0001000100010001LL;
00186 static mmx_t RTjpeg_half=(mmx_t)(long long)0x7fff7fff7fff7fffLL;
00187
00188 void RTjpeg_quant(__s16 *block, __s32 *qtbl)
00189 {
00190 int i;
00191 mmx_t *bl, *ql;
00192
00193 ql=(mmx_t *)qtbl;
00194 bl=(mmx_t *)block;
00195
00196 movq_m2r(RTjpeg_ones, mm6);
00197 movq_m2r(RTjpeg_half, mm7);
00198
00199 for(i=16; i; i--)
00200 {
00201 movq_m2r(*(ql++), mm0);
00202 movq_m2r(*bl, mm2);
00203 movq_r2r(mm0, mm1);
00204 movq_r2r(mm2, mm3);
00205
00206 punpcklwd_r2r(mm6, mm0);
00207 punpckhwd_r2r(mm6, mm1);
00208
00209 punpcklwd_r2r(mm7, mm2);
00210 punpckhwd_r2r(mm7, mm3);
00211
00212 pmaddwd_r2r(mm2, mm0);
00213 pmaddwd_r2r(mm3, mm1);
00214
00215 psrad_i2r(16, mm0);
00216 psrad_i2r(16, mm1);
00217
00218 packssdw_r2r(mm1, mm0);
00219
00220 movq_r2m(mm0, *(bl++));
00221
00222 }
00223 }
00224 #else
00225 void RTjpeg_quant_init(void)
00226 {
00227 }
00228
00229 void RTjpeg_quant(__s16 *block, __s32 *qtbl)
00230 {
00231 int i;
00232
00233 for(i=0; i<64; i++)
00234 block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16);
00235 }
00236 #endif
00237
00238
00239
00240
00241 #ifdef USE_MMX
00242 static mmx_t RTjpeg_C4 =(mmx_t)(long long)0x2D412D412D412D41LL;
00243 static mmx_t RTjpeg_C6 =(mmx_t)(long long)0x187E187E187E187ELL;
00244 static mmx_t RTjpeg_C2mC6=(mmx_t)(long long)0x22A322A322A322A3LL;
00245 static mmx_t RTjpeg_C2pC6=(mmx_t)(long long)0x539F539F539F539FLL;
00246 static mmx_t RTjpeg_zero =(mmx_t)(long long)0x0000000000000000LL;
00247
00248 #else
00249
00250 #define FIX_0_382683433 ((__s32) 98)
00251 #define FIX_0_541196100 ((__s32) 139)
00252 #define FIX_0_707106781 ((__s32) 181)
00253 #define FIX_1_306562965 ((__s32) 334)
00254
00255 #define DESCALE10(x) (__s16)( ((x)+128) >> 8)
00256 #define DESCALE20(x) (__s16)(((x)+32768) >> 16)
00257 #define D_MULTIPLY(var,const) ((__s32) ((var) * (const)))
00258 #endif
00259
00260 void RTjpeg_dct_init(void)
00261 {
00262 int i;
00263
00264 for(i=0; i<64; i++)
00265 {
00266 RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]);
00267 RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]);
00268 }
00269 }
00270
00271 void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
00272 {
00273 #ifndef MMX
00274 __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00275 __s32 tmp10, tmp11, tmp12, tmp13;
00276 __s32 z1, z2, z3, z4, z5, z11, z13;
00277 __u8 *idataptr;
00278 __s16 *odataptr;
00279 __s32 *wsptr;
00280 int ctr;
00281
00282 idataptr = idata;
00283 wsptr = RTjpeg_ws;
00284 for (ctr = 7; ctr >= 0; ctr--) {
00285 tmp0 = idataptr[0] + idataptr[7];
00286 tmp7 = idataptr[0] - idataptr[7];
00287 tmp1 = idataptr[1] + idataptr[6];
00288 tmp6 = idataptr[1] - idataptr[6];
00289 tmp2 = idataptr[2] + idataptr[5];
00290 tmp5 = idataptr[2] - idataptr[5];
00291 tmp3 = idataptr[3] + idataptr[4];
00292 tmp4 = idataptr[3] - idataptr[4];
00293
00294 tmp10 = (tmp0 + tmp3);
00295 tmp13 = tmp0 - tmp3;
00296 tmp11 = (tmp1 + tmp2);
00297 tmp12 = tmp1 - tmp2;
00298
00299 wsptr[0] = (tmp10 + tmp11)<<8;
00300 wsptr[4] = (tmp10 - tmp11)<<8;
00301
00302 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00303 wsptr[2] = (tmp13<<8) + z1;
00304 wsptr[6] = (tmp13<<8) - z1;
00305
00306 tmp10 = tmp4 + tmp5;
00307 tmp11 = tmp5 + tmp6;
00308 tmp12 = tmp6 + tmp7;
00309
00310 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00311 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5;
00312 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5;
00313 z3 = D_MULTIPLY(tmp11, FIX_0_707106781);
00314
00315 z11 = (tmp7<<8) + z3;
00316 z13 = (tmp7<<8) - z3;
00317
00318 wsptr[5] = z13 + z2;
00319 wsptr[3] = z13 - z2;
00320 wsptr[1] = z11 + z4;
00321 wsptr[7] = z11 - z4;
00322
00323 idataptr += rskip<<3;
00324 wsptr += 8;
00325 }
00326
00327 wsptr = RTjpeg_ws;
00328 odataptr=odata;
00329 for (ctr = 7; ctr >= 0; ctr--) {
00330 tmp0 = wsptr[0] + wsptr[56];
00331 tmp7 = wsptr[0] - wsptr[56];
00332 tmp1 = wsptr[8] + wsptr[48];
00333 tmp6 = wsptr[8] - wsptr[48];
00334 tmp2 = wsptr[16] + wsptr[40];
00335 tmp5 = wsptr[16] - wsptr[40];
00336 tmp3 = wsptr[24] + wsptr[32];
00337 tmp4 = wsptr[24] - wsptr[32];
00338
00339 tmp10 = tmp0 + tmp3;
00340 tmp13 = tmp0 - tmp3;
00341 tmp11 = tmp1 + tmp2;
00342 tmp12 = tmp1 - tmp2;
00343
00344 odataptr[0] = DESCALE10(tmp10 + tmp11);
00345 odataptr[32] = DESCALE10(tmp10 - tmp11);
00346
00347 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00348 odataptr[16] = DESCALE20((tmp13<<8) + z1);
00349 odataptr[48] = DESCALE20((tmp13<<8) - z1);
00350
00351 tmp10 = tmp4 + tmp5;
00352 tmp11 = tmp5 + tmp6;
00353 tmp12 = tmp6 + tmp7;
00354
00355 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00356 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5;
00357 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5;
00358 z3 = D_MULTIPLY(tmp11, FIX_0_707106781);
00359
00360 z11 = (tmp7<<8) + z3;
00361 z13 = (tmp7<<8) - z3;
00362
00363 odataptr[40] = DESCALE20(z13 + z2);
00364 odataptr[24] = DESCALE20(z13 - z2);
00365 odataptr[8] = DESCALE20(z11 + z4);
00366 odataptr[56] = DESCALE20(z11 - z4);
00367
00368 odataptr++;
00369 wsptr++;
00370 }
00371 #else
00372 mmx_t tmp6, tmp7;
00373 register mmx_t *dataptr = (mmx_t *)odata;
00374 mmx_t *idata2 = (mmx_t *)idata;
00375
00376
00377
00378 movq_m2r(RTjpeg_zero, mm2);
00379
00380
00381 movq_m2r(*idata2, mm0);
00382 movq_r2r(mm0, mm1);
00383
00384 punpcklbw_r2r(mm2, mm0);
00385 movq_r2m(mm0, *(dataptr));
00386
00387 punpckhbw_r2r(mm2, mm1);
00388 movq_r2m(mm1, *(dataptr+1));
00389
00390 idata2 += rskip;
00391
00392 movq_m2r(*idata2, mm0);
00393 movq_r2r(mm0, mm1);
00394
00395 punpcklbw_r2r(mm2, mm0);
00396 movq_r2m(mm0, *(dataptr+2));
00397
00398 punpckhbw_r2r(mm2, mm1);
00399 movq_r2m(mm1, *(dataptr+3));
00400
00401 idata2 += rskip;
00402
00403 movq_m2r(*idata2, mm0);
00404 movq_r2r(mm0, mm1);
00405
00406 punpcklbw_r2r(mm2, mm0);
00407 movq_r2m(mm0, *(dataptr+4));
00408
00409 punpckhbw_r2r(mm2, mm1);
00410 movq_r2m(mm1, *(dataptr+5));
00411
00412 idata2 += rskip;
00413
00414 movq_m2r(*idata2, mm0);
00415 movq_r2r(mm0, mm1);
00416
00417 punpcklbw_r2r(mm2, mm0);
00418 movq_r2m(mm0, *(dataptr+6));
00419
00420 punpckhbw_r2r(mm2, mm1);
00421 movq_r2m(mm1, *(dataptr+7));
00422
00423 idata2 += rskip;
00424
00425 movq_m2r(*idata2, mm0);
00426 movq_r2r(mm0, mm1);
00427
00428 punpcklbw_r2r(mm2, mm0);
00429 movq_r2m(mm0, *(dataptr+8));
00430
00431 punpckhbw_r2r(mm2, mm1);
00432 movq_r2m(mm1, *(dataptr+9));
00433
00434 idata2 += rskip;
00435
00436 movq_m2r(*idata2, mm0);
00437 movq_r2r(mm0, mm1);
00438
00439 punpcklbw_r2r(mm2, mm0);
00440 movq_r2m(mm0, *(dataptr+10));
00441
00442 punpckhbw_r2r(mm2, mm1);
00443 movq_r2m(mm1, *(dataptr+11));
00444
00445 idata2 += rskip;
00446
00447 movq_m2r(*idata2, mm0);
00448 movq_r2r(mm0, mm1);
00449
00450 punpcklbw_r2r(mm2, mm0);
00451 movq_r2m(mm0, *(dataptr+12));
00452
00453 punpckhbw_r2r(mm2, mm1);
00454 movq_r2m(mm1, *(dataptr+13));
00455
00456 idata2 += rskip;
00457
00458 movq_m2r(*idata2, mm0);
00459 movq_r2r(mm0, mm1);
00460
00461 punpcklbw_r2r(mm2, mm0);
00462 movq_r2m(mm0, *(dataptr+14));
00463
00464 punpckhbw_r2r(mm2, mm1);
00465 movq_r2m(mm1, *(dataptr+15));
00466
00467
00468
00469 movq_m2r(*(dataptr+9), mm7);
00470
00471 movq_m2r(*(dataptr+13), mm6);
00472 movq_r2r(mm7, mm5);
00473
00474 punpcklwd_m2r(*(dataptr+11), mm7);
00475 movq_r2r(mm6, mm2);
00476
00477 punpcklwd_m2r(*(dataptr+15), mm6);
00478 movq_r2r(mm7, mm1);
00479
00480 movq_m2r(*(dataptr+11), mm3);
00481 punpckldq_r2r(mm6, mm7);
00482
00483 movq_m2r(*(dataptr+15), mm0);
00484 punpckhdq_r2r(mm6, mm1);
00485
00486 movq_r2m(mm7,*(dataptr+9));
00487 punpckhwd_r2r(mm3, mm5);
00488
00489 movq_r2m(mm1,*(dataptr+11));
00490 punpckhwd_r2r(mm0, mm2);
00491
00492 movq_r2r(mm5, mm1);
00493 punpckldq_r2r(mm2, mm5);
00494
00495 movq_m2r(*(dataptr+1), mm0);
00496 punpckhdq_r2r(mm2, mm1);
00497
00498 movq_r2m(mm5,*(dataptr+13));
00499
00500
00501
00502 movq_r2m(mm1, *(dataptr+15));
00503
00504 movq_m2r(*(dataptr+5), mm2);
00505 movq_r2r(mm0, mm6);
00506
00507 punpcklwd_m2r(*(dataptr+3), mm0);
00508 movq_r2r(mm2, mm7);
00509
00510 punpcklwd_m2r(*(dataptr+7), mm2);
00511 movq_r2r(mm0, mm4);
00512
00513
00514 movq_m2r(*(dataptr+8), mm1);
00515 punpckldq_r2r(mm2, mm0);
00516
00517 movq_m2r(*(dataptr+12), mm3);
00518 punpckhdq_r2r(mm2, mm4);
00519
00520 punpckhwd_m2r(*(dataptr+3), mm6);
00521 movq_r2r(mm1, mm2);
00522
00523 punpckhwd_m2r(*(dataptr+7), mm7);
00524 movq_r2r(mm6, mm5);
00525
00526 movq_r2m(mm0, *(dataptr+8));
00527 punpckhdq_r2r(mm7, mm5);
00528
00529 punpcklwd_m2r(*(dataptr+10), mm1);
00530 movq_r2r(mm3, mm0);
00531
00532 punpckhwd_m2r(*(dataptr+10), mm2);
00533
00534 movq_r2m(mm4, *(dataptr+10));
00535 punpckldq_r2r(mm7, mm6);
00536
00537 punpcklwd_m2r(*(dataptr+14), mm3);
00538 movq_r2r(mm1, mm4);
00539
00540 movq_r2m(mm6, *(dataptr+12));
00541 punpckldq_r2r(mm3, mm1);
00542
00543 punpckhwd_m2r(*(dataptr+14), mm0);
00544 movq_r2r(mm2, mm6);
00545
00546 movq_r2m(mm5, *(dataptr+14));
00547 punpckhdq_r2r(mm3, mm4);
00548
00549 movq_r2m(mm1, *(dataptr+1));
00550 punpckldq_r2r(mm0, mm2);
00551
00552 movq_r2m(mm4, *(dataptr+3));
00553 punpckhdq_r2r(mm0, mm6);
00554
00555 movq_r2m(mm2, *(dataptr+5));
00556
00557 movq_m2r(*dataptr, mm0);
00558
00559 movq_r2m(mm6, *(dataptr+7));
00560
00561
00562
00563
00564 movq_m2r(*(dataptr+4), mm7);
00565 movq_r2r(mm0, mm2);
00566
00567 punpcklwd_m2r(*(dataptr+2), mm0);
00568 movq_r2r(mm7, mm4);
00569
00570 punpcklwd_m2r(*(dataptr+6), mm7);
00571 movq_r2r(mm0, mm1);
00572
00573 movq_m2r(*(dataptr+2), mm6);
00574 punpckldq_r2r(mm7, mm0);
00575
00576 movq_m2r(*(dataptr+6), mm5);
00577 punpckhdq_r2r(mm7, mm1);
00578
00579 movq_r2r(mm0, mm7);
00580 punpckhwd_r2r(mm6, mm2);
00581
00582 psubw_m2r(*(dataptr+14), mm7); */
00583 movq_r2r(mm1, mm6);
00584
00585 paddw_m2r(*(dataptr+14), mm0); */
00586 punpckhwd_r2r(mm5, mm4);
00587
00588 paddw_m2r(*(dataptr+12), mm1); */
00589 movq_r2r(mm2, mm3);
00590
00591 psubw_m2r(*(dataptr+12), mm6); */
00592 punpckldq_r2r(mm4, mm2);
00593
00594 movq_r2m(mm7, tmp7);
00595 movq_r2r(mm2, mm5);
00596
00597 movq_r2m(mm6, tmp6);
00598 punpckhdq_r2r(mm4, mm3);
00599
00600 paddw_m2r(*(dataptr+10), mm2); */
00601 movq_r2r(mm3, mm4);
00602
00603
00604
00605
00606
00607
00608 paddw_m2r(*(dataptr+8), mm3); */
00609 movq_r2r(mm0, mm7);
00610
00611 psubw_m2r(*(dataptr+8), mm4); */
00612 movq_r2r(mm1, mm6);
00613
00614 paddw_r2r(mm3, mm0); */
00615 psubw_r2r(mm3, mm7); */
00616
00617 psubw_r2r(mm2, mm6); */
00618 paddw_r2r(mm2, mm1); */
00619
00620 psubw_m2r(*(dataptr+10), mm5); */
00621 paddw_r2r(mm7, mm6);
00622
00623
00624
00625 movq_m2r(tmp6, mm2);
00626 movq_r2r(mm0, mm3);
00627
00628 psllw_i2r(2, mm6);
00629 paddw_r2r(mm1, mm0);
00630
00631 pmulhw_m2r(RTjpeg_C4, mm6);
00632 psubw_r2r(mm1, mm3);
00633
00634 movq_r2m(mm0, *dataptr);
00635 movq_r2r(mm7, mm0);
00636
00637
00638 movq_r2m(mm3, *(dataptr+8));
00639 paddw_r2r(mm5, mm4);
00640
00641 movq_m2r(tmp7, mm3);
00642 paddw_r2r(mm6, mm0);
00643
00644 paddw_r2r(mm2, mm5);
00645 psubw_r2r(mm6, mm7);
00646
00647 movq_r2m(mm0, *(dataptr+4));
00648 paddw_r2r(mm3, mm2);
00649
00650
00651
00652 movq_r2m(mm7, *(dataptr+12));
00653 movq_r2r(mm4, mm1);
00654
00655 psubw_r2r(mm2, mm1);
00656 psllw_i2r(2, mm4);
00657
00658 movq_m2r(RTjpeg_C2mC6, mm0);
00659 psllw_i2r(2, mm1);
00660
00661 pmulhw_m2r(RTjpeg_C6, mm1);
00662 psllw_i2r(2, mm2);
00663
00664 pmulhw_r2r(mm0, mm4);
00665
00666
00667
00668 pmulhw_m2r(RTjpeg_C2pC6, mm2);
00669 psllw_i2r(2, mm5);
00670
00671 pmulhw_m2r(RTjpeg_C4, mm5);
00672 movq_r2r(mm3, mm0);
00673
00674 movq_m2r(*(dataptr+1), mm7);
00675 paddw_r2r(mm1, mm4);
00676
00677 paddw_r2r(mm1, mm2);
00678
00679 paddw_r2r(mm5, mm0);
00680 psubw_r2r(mm5, mm3);
00681
00682
00683
00684 movq_r2r(mm3, mm5);
00685 psubw_r2r(mm4, mm3);
00686
00687 paddw_r2r(mm4, mm5);
00688 movq_r2r(mm0, mm6);
00689
00690 movq_r2m(mm3, *(dataptr+6));
00691 psubw_r2r(mm2, mm0);
00692
00693 movq_r2m(mm5, *(dataptr+10));
00694 paddw_r2r(mm2, mm6);
00695
00696 movq_r2m(mm0, *(dataptr+14));
00697
00698
00699
00700
00701
00702 movq_m2r(*(dataptr+3), mm1); */
00703 movq_r2r(mm7, mm0);
00704
00705 movq_r2m(mm6, *(dataptr+2));
00706
00707 movq_m2r(*(dataptr+5), mm2); */
00708 movq_r2r(mm1, mm6);
00709
00710 paddw_m2r(*(dataptr+15), mm0);
00711
00712 movq_m2r(*(dataptr+7), mm3); */
00713 movq_r2r(mm2, mm5);
00714
00715 psubw_m2r(*(dataptr+15), mm7);
00716 movq_r2r(mm3, mm4);
00717
00718 paddw_m2r(*(dataptr+13), mm1);
00719
00720 movq_r2m(mm7, tmp7);
00721 movq_r2r(mm0, mm7);
00722
00723 psubw_m2r(*(dataptr+13), mm6);
00724
00725
00726
00727 paddw_m2r(*(dataptr+9), mm3);
00728
00729 movq_r2m(mm6, tmp6);
00730 movq_r2r(mm1, mm6);
00731
00732 paddw_m2r(*(dataptr+11), mm2);
00733 paddw_r2r(mm3, mm0);
00734
00735 psubw_r2r(mm3, mm7);
00736
00737 psubw_m2r(*(dataptr+9), mm4);
00738 psubw_r2r(mm2, mm6);
00739
00740 paddw_r2r(mm2, mm1);
00741
00742 psubw_m2r(*(dataptr+11), mm5);
00743 paddw_r2r(mm7, mm6);
00744
00745
00746
00747 movq_m2r(tmp6, mm2);
00748 movq_r2r(mm0, mm3);
00749
00750 psllw_i2r(2, mm6);
00751 paddw_r2r(mm1, mm0);
00752
00753 pmulhw_m2r(RTjpeg_C4, mm6);
00754 psubw_r2r(mm1, mm3);
00755
00756 movq_r2m(mm0, *(dataptr+1));
00757 movq_r2r(mm7, mm0);
00758
00759
00760
00761 movq_r2m(mm3, *(dataptr+9));
00762 paddw_r2r(mm5, mm4);
00763
00764 movq_m2r(tmp7, mm3);
00765 paddw_r2r(mm6, mm0);
00766
00767 paddw_r2r(mm2, mm5);
00768 psubw_r2r(mm6, mm7);
00769
00770 movq_r2m(mm0, *(dataptr+5));
00771 paddw_r2r(mm3, mm2);
00772
00773
00774
00775 movq_r2m(mm7, *(dataptr+13));
00776 movq_r2r(mm4, mm1);
00777
00778 psubw_r2r(mm2, mm1);
00779 psllw_i2r(2, mm4);
00780
00781 movq_m2r(RTjpeg_C2mC6, mm0);
00782 psllw_i2r(2, mm1);
00783
00784 pmulhw_m2r(RTjpeg_C6, mm1);
00785 psllw_i2r(2, mm5);
00786
00787 pmulhw_r2r(mm0, mm4);
00788
00789
00790
00791 pmulhw_m2r(RTjpeg_C4, mm5);
00792 psllw_i2r(2, mm2);
00793
00794 pmulhw_m2r(RTjpeg_C2pC6, mm2);
00795 movq_r2r(mm3, mm0);
00796
00797 movq_m2r(*(dataptr+9), mm7);
00798 paddw_r2r(mm1, mm4);
00799
00800 paddw_r2r(mm5, mm0);
00801 psubw_r2r(mm5, mm3);
00802
00803
00804
00805 movq_r2r(mm3, mm5);
00806 paddw_r2r(mm1, mm2);
00807
00808 movq_r2r(mm0, mm6);
00809 psubw_r2r(mm4, mm5);
00810
00811 paddw_r2r(mm2, mm6);
00812 paddw_r2r(mm4, mm3);
00813
00814 movq_r2m(mm5, *(dataptr+7));
00815
00816 movq_r2m(mm6, *(dataptr+3));
00817 psubw_r2r(mm2, mm0);
00818
00819
00820
00821
00822
00823 movq_m2r(*(dataptr+13), mm6);
00824 movq_r2r(mm7, mm5);
00825
00826 punpcklwd_r2r(mm3, mm7);
00827 movq_r2r(mm6, mm2);
00828
00829 punpcklwd_r2r(mm0, mm6);
00830 movq_r2r(mm7, mm1);
00831
00832 punpckldq_r2r(mm6, mm7);
00833
00834 punpckhdq_r2r(mm6, mm1);
00835
00836 movq_r2m(mm7, *(dataptr+9));
00837 punpckhwd_r2r(mm3, mm5);
00838
00839 movq_r2m(mm1, *(dataptr+11));
00840 punpckhwd_r2r(mm0, mm2);
00841
00842 movq_r2r(mm5, mm1);
00843 punpckldq_r2r(mm2, mm5);
00844
00845 movq_m2r(*(dataptr+1), mm0);
00846 punpckhdq_r2r(mm2, mm1);
00847
00848 movq_r2m(mm5, *(dataptr+13));
00849
00850
00851
00852 movq_r2m(mm1, *(dataptr+15));
00853
00854 movq_m2r(*(dataptr+5), mm2);
00855 movq_r2r(mm0, mm6);
00856
00857 punpcklwd_m2r(*(dataptr+3), mm0);
00858 movq_r2r(mm2, mm7);
00859
00860 punpcklwd_m2r(*(dataptr+7), mm2);
00861 movq_r2r(mm0, mm4);
00862
00863
00864
00865 movq_m2r(*(dataptr+8), mm1);
00866 punpckldq_r2r(mm2, mm0);
00867
00868 movq_m2r(*(dataptr+12), mm3);
00869 punpckhdq_r2r(mm2, mm4);
00870
00871 punpckhwd_m2r(*(dataptr+3), mm6);
00872 movq_r2r(mm1, mm2);
00873
00874 punpckhwd_m2r(*(dataptr+7), mm7);
00875 movq_r2r(mm6, mm5);
00876
00877 movq_r2m(mm0, *(dataptr+8));
00878 punpckhdq_r2r(mm7, mm5);
00879
00880 punpcklwd_m2r(*(dataptr+10), mm1);
00881 movq_r2r(mm3, mm0);
00882
00883 punpckhwd_m2r(*(dataptr+10), mm2);
00884
00885 movq_r2m(mm4, *(dataptr+10));
00886 punpckldq_r2r(mm7, mm6);
00887
00888 punpcklwd_m2r(*(dataptr+14), mm3);
00889 movq_r2r(mm1, mm4);
00890
00891 movq_r2m(mm6, *(dataptr+12));
00892 punpckldq_r2r(mm3, mm1);
00893
00894 punpckhwd_m2r(*(dataptr+14), mm0);
00895 movq_r2r(mm2, mm6);
00896
00897 movq_r2m(mm5, *(dataptr+14));
00898 punpckhdq_r2r(mm3, mm4);
00899
00900 movq_r2m(mm1, *(dataptr+1));
00901 punpckldq_r2r(mm0, mm2);
00902
00903 movq_r2m(mm4, *(dataptr+3));
00904 punpckhdq_r2r(mm0, mm6);
00905
00906 movq_r2m(mm2, *(dataptr+5));
00907
00908 movq_m2r(*dataptr, mm0);
00909
00910 movq_r2m(mm6, *(dataptr+7));
00911
00912
00913
00914 movq_m2r(*(dataptr+4), mm7);
00915 movq_r2r(mm0, mm2);
00916
00917 punpcklwd_m2r(*(dataptr+2), mm0);
00918 movq_r2r(mm7, mm4);
00919
00920 punpcklwd_m2r(*(dataptr+6), mm7);
00921 movq_r2r(mm0, mm1);
00922
00923 movq_m2r(*(dataptr+2), mm6);
00924 punpckldq_r2r(mm7, mm0);
00925
00926 movq_m2r(*(dataptr+6), mm5);
00927 punpckhdq_r2r(mm7, mm1);
00928
00929 movq_r2r(mm0, mm7);
00930 punpckhwd_r2r(mm6, mm2);
00931
00932 psubw_m2r(*(dataptr+14), mm7); */
00933 movq_r2r(mm1, mm6);
00934
00935 paddw_m2r(*(dataptr+14), mm0); */
00936 punpckhwd_r2r(mm5, mm4);
00937
00938 paddw_m2r(*(dataptr+12), mm1); */
00939 movq_r2r(mm2, mm3);
00940
00941 psubw_m2r(*(dataptr+12), mm6); */
00942 punpckldq_r2r(mm4, mm2);
00943
00944 movq_r2m(mm7, tmp7);
00945 movq_r2r(mm2, mm5);
00946
00947 movq_r2m(mm6, tmp6);
00948
00949 punpckhdq_r2r(mm4, mm3);
00950
00951 paddw_m2r(*(dataptr+10), mm2); */
00952 movq_r2r(mm3, mm4);
00953
00954
00955
00956
00957
00958 paddw_m2r(*(dataptr+8), mm3); */
00959 movq_r2r(mm0, mm7);
00960
00961 psubw_m2r(*(dataptr+8), mm4); */
00962 movq_r2r(mm1, mm6);
00963
00964 paddw_r2r(mm3, mm0); */
00965 psubw_r2r(mm3, mm7); */
00966
00967 psubw_r2r(mm2, mm6); */
00968 paddw_r2r(mm2, mm1); */
00969
00970 psubw_m2r(*(dataptr+10), mm5); */
00971 paddw_r2r(mm7, mm6);
00972
00973
00974
00975 movq_m2r(tmp6, mm2);
00976 movq_r2r(mm0, mm3);
00977
00978 psllw_i2r(2, mm6);
00979 paddw_r2r(mm1, mm0);
00980
00981 pmulhw_m2r(RTjpeg_C4, mm6);
00982 psubw_r2r(mm1, mm3);
00983
00984 movq_r2m(mm0, *dataptr);
00985 movq_r2r(mm7, mm0);
00986
00987
00988 movq_r2m(mm3, *(dataptr+8));
00989 paddw_r2r(mm5, mm4);
00990
00991 movq_m2r(tmp7, mm3);
00992 paddw_r2r(mm6, mm0);
00993
00994 paddw_r2r(mm2, mm5);
00995 psubw_r2r(mm6, mm7);
00996
00997 movq_r2m(mm0, *(dataptr+4));
00998 paddw_r2r(mm3, mm2);
00999
01000
01001 movq_r2m(mm7, *(dataptr+12));
01002 movq_r2r(mm4, mm1);
01003
01004 psubw_r2r(mm2, mm1);
01005 psllw_i2r(2, mm4);
01006
01007 movq_m2r(RTjpeg_C2mC6, mm0);
01008 psllw_i2r(2, mm1);
01009
01010 pmulhw_m2r(RTjpeg_C6, mm1);
01011 psllw_i2r(2, mm2);
01012
01013 pmulhw_r2r(mm0, mm4);
01014
01015
01016
01017 pmulhw_m2r(RTjpeg_C2pC6, mm2);
01018 psllw_i2r(2, mm5);
01019
01020 pmulhw_m2r(RTjpeg_C4, mm5);
01021 movq_r2r(mm3, mm0);
01022
01023 movq_m2r(*(dataptr+1), mm7);
01024 paddw_r2r(mm1, mm4);
01025
01026 paddw_r2r(mm1, mm2);
01027
01028 paddw_r2r(mm5, mm0);
01029 psubw_r2r(mm5, mm3);
01030
01031
01032
01033 movq_r2r(mm3, mm5);
01034 psubw_r2r(mm4, mm3);
01035
01036 paddw_r2r(mm4, mm5);
01037 movq_r2r(mm0, mm6);
01038
01039 movq_r2m(mm3, *(dataptr+6));
01040 psubw_r2r(mm2, mm0);
01041
01042 movq_r2m(mm5, *(dataptr+10));
01043 paddw_r2r(mm2, mm6);
01044
01045 movq_r2m(mm0, *(dataptr+14));
01046
01047
01048
01049
01050
01051 movq_m2r(*(dataptr+3), mm1); */
01052 movq_r2r(mm7, mm0);
01053
01054 movq_r2m(mm6, *(dataptr+2));
01055
01056 movq_m2r(*(dataptr+5), mm2); */
01057 movq_r2r(mm1, mm6);
01058
01059 paddw_m2r(*(dataptr+15), mm0);
01060
01061 movq_m2r(*(dataptr+7), mm3); */
01062 movq_r2r(mm2, mm5);
01063
01064 psubw_m2r(*(dataptr+15), mm7);
01065 movq_r2r(mm3, mm4);
01066
01067 paddw_m2r(*(dataptr+13), mm1);
01068
01069 movq_r2m(mm7, tmp7);
01070 movq_r2r(mm0, mm7);
01071
01072 psubw_m2r(*(dataptr+13), mm6);
01073
01074
01075
01076 paddw_m2r(*(dataptr+9), mm3);
01077
01078 movq_r2m(mm6, tmp6);
01079 movq_r2r(mm1, mm6);
01080
01081 paddw_m2r(*(dataptr+11), mm2);
01082 paddw_r2r(mm3, mm0);
01083
01084 psubw_r2r(mm3, mm7);
01085
01086 psubw_m2r(*(dataptr+9), mm4);
01087 psubw_r2r(mm2, mm6);
01088
01089 paddw_r2r(mm2, mm1);
01090
01091 psubw_m2r(*(dataptr+11), mm5);
01092 paddw_r2r(mm7, mm6);
01093
01094
01095
01096 movq_m2r(tmp6, mm2);
01097 movq_r2r(mm0, mm3);
01098
01099 psllw_i2r(2, mm6);
01100 paddw_r2r(mm1, mm0);
01101
01102 pmulhw_m2r(RTjpeg_C4, mm6);
01103 psubw_r2r(mm1, mm3);
01104
01105 movq_r2m(mm0, *(dataptr+1));
01106 movq_r2r(mm7, mm0);
01107
01108
01109
01110 movq_r2m(mm3, *(dataptr+9));
01111 paddw_r2r(mm5, mm4);
01112
01113 movq_m2r(tmp7, mm3);
01114 paddw_r2r(mm6, mm0);
01115
01116 paddw_r2r(mm2, mm5);
01117 psubw_r2r(mm6, mm7);
01118
01119 movq_r2m(mm0, *(dataptr+5));
01120 paddw_r2r(mm3, mm2);
01121
01122
01123
01124 movq_r2m(mm7, *(dataptr+13));
01125 movq_r2r(mm4, mm1);
01126
01127 psubw_r2r(mm2, mm1);
01128 psllw_i2r(2, mm4);
01129
01130 movq_m2r(RTjpeg_C2mC6, mm0);
01131 psllw_i2r(2, mm1);
01132
01133 pmulhw_m2r(RTjpeg_C6, mm1);
01134 psllw_i2r(2, mm5);
01135
01136 pmulhw_r2r(mm0, mm4);
01137
01138
01139
01140 pmulhw_m2r(RTjpeg_C4, mm5);
01141 psllw_i2r(2, mm2);
01142
01143 pmulhw_m2r(RTjpeg_C2pC6, mm2);
01144 movq_r2r(mm3, mm0);
01145
01146 movq_m2r(*(dataptr+9), mm7);
01147 paddw_r2r(mm1, mm4);
01148
01149 paddw_r2r(mm5, mm0);
01150 psubw_r2r(mm5, mm3);
01151
01152
01153
01154 movq_r2r(mm3, mm5);
01155 paddw_r2r(mm1, mm2);
01156
01157 movq_r2r(mm0, mm6);
01158 psubw_r2r(mm4, mm5);
01159
01160 paddw_r2r(mm2, mm6);
01161 paddw_r2r(mm4, mm3);
01162
01163 movq_r2m(mm5, *(dataptr+7));
01164 psubw_r2r(mm2, mm0);
01165
01166 movq_r2m(mm3, *(dataptr+11));
01167
01168 movq_r2m(mm6, *(dataptr+3));
01169
01170 movq_r2m(mm0, *(dataptr+15));
01171
01172
01173 #endif
01174 }
01175
01176 #define FIX_1_082392200 ((__s32) 277)
01177 #define FIX_1_414213562 ((__s32) 362)
01178 #define FIX_1_847759065 ((__s32) 473)
01179 #define FIX_2_613125930 ((__s32) 669)
01180
01181 #define DESCALE(x) (__s16)( ((x)+4) >> 3)
01182
01183
01184
01185 #define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x))
01186 #define MULTIPLY(var,const) (((__s32) ((var) * (const)) + 128)>>8)
01187
01188 void RTjpeg_idct_init(void)
01189 {
01190 int i;
01191
01192 for(i=0; i<64; i++)
01193 {
01194 RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32;
01195 RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32;
01196 }
01197 }
01198
01199 void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip)
01200 {
01201 #ifdef USE_MMX
01202
01203 static mmx_t fix_141 = (mmx_t)(long long)0x5a825a825a825a82LL;
01204 static mmx_t fix_184n261 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
01205 static mmx_t fix_184 = (mmx_t)(long long)0x7641764176417641LL;
01206 static mmx_t fix_n184 = (mmx_t)(long long)0x896f896f896f896fLL;
01207 static mmx_t fix_108n184 = (mmx_t)(long long)0xcf04cf04cf04cf04LL;
01208
01209 mmx_t workspace[64];
01210 mmx_t *wsptr = workspace;
01211 register mmx_t *dataptr = (mmx_t *)odata;
01212 mmx_t *idata = (mmx_t *)data;
01213
01214 rskip = rskip>>3;
01215
01216
01217
01218
01219
01220
01221 movq_m2r(*(idata+10), mm1);
01222
01223 movq_m2r(*(idata+6), mm0);
01224
01225 movq_m2r(*(idata+2), mm3);
01226
01227 movq_r2r(mm1, mm2); */
01228
01229 movq_m2r(*(idata+14), mm4);
01230
01231 paddw_r2r(mm0, mm1);
01232
01233 psubw_r2r(mm0, mm2);
01234
01235 psllw_i2r(2, mm2);
01236 movq_r2r(mm2, mm0);
01237
01238 pmulhw_m2r(fix_184n261, mm2); */
01239 movq_r2r(mm3, mm5);
01240
01241 pmulhw_m2r(fix_n184, mm0); */
01242 paddw_r2r(mm4, mm3);
01243
01244 movq_r2r(mm3, mm6); */
01245 psubw_r2r(mm4, mm5);
01246
01247 psubw_r2r(mm1, mm6);
01248 psllw_i2r(2, mm5);
01249
01250 movq_m2r(*(idata+12), mm4);
01251 movq_r2r(mm5, mm7);
01252
01253 pmulhw_m2r(fix_108n184, mm5); even part */
01254 paddw_r2r(mm1, mm3);
01255
01256
01257
01258
01259 pmulhw_m2r(fix_184, mm7); */
01260 psllw_i2r(2, mm6);
01261
01262 movq_m2r(*(idata+4), mm1);
01263
01264 paddw_r2r(mm5, mm0);
01265
01266 paddw_r2r(mm7, mm2);
01267
01268 pmulhw_m2r(fix_141, mm6); */
01269 psubw_r2r(mm3, mm2);
01270
01271 movq_r2r(mm1, mm5);
01272 paddw_r2r(mm4, mm1); */
01273
01274 psubw_r2r(mm4, mm5);
01275 psubw_r2r(mm2, mm6);
01276
01277 movq_r2m(mm1, *(wsptr));
01278 psllw_i2r(2, mm5);
01279
01280 movq_m2r(*(idata), mm7);
01281
01282 pmulhw_m2r(fix_141, mm5);
01283 paddw_r2r(mm6, mm0);
01284
01285 movq_m2r(*(idata+8), mm4);
01286
01287 psubw_r2r(mm1, mm5); */
01288
01289 movq_r2m(mm0, *(wsptr+4));
01290 movq_r2r(mm7, mm1); */
01291
01292 movq_r2m(mm5, *(wsptr+2));
01293 psubw_r2r(mm4, mm1);
01294
01295 paddw_r2r(mm4, mm7);
01296 movq_r2r(mm1, mm5);
01297
01298 paddw_m2r(*(wsptr+2), mm1);
01299 movq_r2r(mm7, mm4); */
01300
01301 paddw_m2r(*(wsptr), mm7);
01302
01303 psubw_m2r(*(wsptr), mm4);
01304 movq_r2r(mm7, mm0);
01305
01306 psubw_m2r(*(wsptr+2), mm5);
01307 paddw_r2r(mm3, mm7);
01308
01309 psubw_r2r(mm3, mm0);
01310
01311 movq_r2m(mm7, *(wsptr));
01312 movq_r2r(mm1, mm3);
01313
01314 movq_r2m(mm0, *(wsptr+14));
01315 paddw_r2r(mm2, mm1);
01316
01317 psubw_r2r(mm2, mm3);
01318
01319 movq_r2m(mm1, *(wsptr+2));
01320 movq_r2r(mm4, mm1);
01321
01322 movq_r2m(mm3, *(wsptr+12));
01323
01324 paddw_m2r(*(wsptr+4), mm4);
01325
01326 psubw_m2r(*(wsptr+4), mm1);
01327
01328 movq_r2m(mm4, *(wsptr+8));
01329 movq_r2r(mm5, mm7);
01330
01331 paddw_r2r(mm6, mm5);
01332
01333 movq_r2m(mm1, *(wsptr+6));
01334 psubw_r2r(mm6, mm7);
01335
01336 movq_r2m(mm5, *(wsptr+4));
01337
01338 movq_r2m(mm7, *(wsptr+10));
01339
01340
01341
01342
01343
01344
01345 idata++;
01346 wsptr++;
01347
01348
01349
01350 movq_m2r(*(idata+10), mm1);
01351
01352 movq_m2r(*(idata+6), mm0);
01353
01354 movq_m2r(*(idata+2), mm3);
01355 movq_r2r(mm1, mm2); */
01356
01357 movq_m2r(*(idata+14), mm4);
01358 paddw_r2r(mm0, mm1);
01359
01360 psubw_r2r(mm0, mm2);
01361
01362 psllw_i2r(2, mm2);
01363 movq_r2r(mm2, mm0);
01364
01365 pmulhw_m2r(fix_184n261, mm2); */
01366 movq_r2r(mm3, mm5);
01367
01368 pmulhw_m2r(fix_n184, mm0); */
01369 paddw_r2r(mm4, mm3);
01370
01371 movq_r2r(mm3, mm6); */
01372 psubw_r2r(mm4, mm5);
01373
01374 psubw_r2r(mm1, mm6);
01375 psllw_i2r(2, mm5);
01376
01377 movq_m2r(*(idata+12), mm4);
01378 movq_r2r(mm5, mm7);
01379
01380 pmulhw_m2r(fix_108n184, mm5); even part */
01381 paddw_r2r(mm1, mm3);
01382
01383
01384
01385
01386 pmulhw_m2r(fix_184, mm7); */
01387 psllw_i2r(2, mm6);
01388
01389 movq_m2r(*(idata+4), mm1);
01390
01391 paddw_r2r(mm5, mm0);
01392
01393 paddw_r2r(mm7, mm2);
01394
01395 pmulhw_m2r(fix_141, mm6); */
01396 psubw_r2r(mm3, mm2);
01397
01398 movq_r2r(mm1, mm5);
01399 paddw_r2r(mm4, mm1); */
01400
01401 psubw_r2r(mm4, mm5);
01402 psubw_r2r(mm2, mm6);
01403
01404 movq_r2m(mm1, *(wsptr));
01405 psllw_i2r(2, mm5);
01406
01407 movq_m2r(*(idata), mm7);
01408 paddw_r2r(mm6, mm0);
01409
01410 pmulhw_m2r(fix_141, mm5);
01411
01412 movq_m2r(*(idata+8), mm4);
01413
01414 psubw_r2r(mm1, mm5); */
01415
01416 movq_r2m(mm0, *(wsptr+4));
01417 movq_r2r(mm7, mm1); */
01418
01419 movq_r2m(mm5, *(wsptr+2));
01420 psubw_r2r(mm4, mm1);
01421
01422 paddw_r2r(mm4, mm7);
01423 movq_r2r(mm1, mm5);
01424
01425 paddw_m2r(*(wsptr+2), mm1);
01426 movq_r2r(mm7, mm4); */
01427
01428 paddw_m2r(*(wsptr), mm7);
01429
01430 psubw_m2r(*(wsptr), mm4);
01431 movq_r2r(mm7, mm0);
01432
01433 psubw_m2r(*(wsptr+2), mm5);
01434 paddw_r2r(mm3, mm7);
01435
01436 psubw_r2r(mm3, mm0);
01437
01438 movq_r2m(mm7, *(wsptr));
01439 movq_r2r(mm1, mm3);
01440
01441 movq_r2m(mm0, *(wsptr+14));
01442 paddw_r2r(mm2, mm1);
01443
01444 psubw_r2r(mm2, mm3);
01445
01446 movq_r2m(mm1, *(wsptr+2));
01447 movq_r2r(mm4, mm1);
01448
01449 movq_r2m(mm3, *(wsptr+12));
01450
01451 paddw_m2r(*(wsptr+4), mm4);
01452
01453 psubw_m2r(*(wsptr+4), mm1);
01454
01455 movq_r2m(mm4, *(wsptr+8));
01456 movq_r2r(mm5, mm7);
01457
01458 paddw_r2r(mm6, mm5);
01459
01460 movq_r2m(mm1, *(wsptr+6));
01461 psubw_r2r(mm6, mm7);
01462
01463 movq_r2m(mm5, *(wsptr+4));
01464
01465 movq_r2m(mm7, *(wsptr+10));
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476 wsptr--;
01477
01478
01479
01480
01481
01482 movq_m2r(*(wsptr), mm0);
01483
01484 movq_m2r(*(wsptr+1), mm1);
01485 movq_r2r(mm0, mm2);
01486
01487 movq_m2r(*(wsptr+2), mm3);
01488 paddw_r2r(mm1, mm0);
01489
01490 movq_m2r(*(wsptr+3), mm4);
01491 psubw_r2r(mm1, mm2);
01492
01493 movq_r2r(mm0, mm6);
01494 movq_r2r(mm3, mm5);
01495
01496 paddw_r2r(mm4, mm3);
01497 movq_r2r(mm2, mm1);
01498
01499 psubw_r2r(mm4, mm5);
01500 punpcklwd_r2r(mm3, mm0);
01501
01502 movq_m2r(*(wsptr+7), mm7);
01503 punpckhwd_r2r(mm3, mm6);
01504
01505 movq_m2r(*(wsptr+4), mm3);
01506 punpckldq_r2r(mm6, mm0);
01507
01508 punpcklwd_r2r(mm5, mm1);
01509 movq_r2r(mm3, mm4);
01510
01511 movq_m2r(*(wsptr+6), mm6);
01512 punpckhwd_r2r(mm5, mm2);
01513
01514 movq_m2r(*(wsptr+5), mm5);
01515 punpckldq_r2r(mm2, mm1);
01516
01517
01518 paddw_r2r(mm5, mm3);
01519 movq_r2r(mm6, mm2);
01520
01521 psubw_r2r(mm5, mm4);
01522 paddw_r2r(mm7, mm6);
01523
01524 movq_r2r(mm3, mm5);
01525 punpcklwd_r2r(mm6, mm3);
01526
01527 psubw_r2r(mm7, mm2);
01528 punpckhwd_r2r(mm6, mm5);
01529
01530 movq_r2r(mm4, mm7);
01531 punpckldq_r2r(mm5, mm3);
01532
01533 punpcklwd_r2r(mm2, mm4);
01534
01535 punpckhwd_r2r(mm2, mm7);
01536
01537 punpckldq_r2r(mm7, mm4);
01538 movq_r2r(mm1, mm6);
01539
01540
01541
01542
01543
01544
01545
01546 movq_r2r(mm0, mm2);
01547 punpckhdq_r2r(mm4, mm6);
01548
01549 punpckldq_r2r(mm4, mm1);
01550 psllw_i2r(2, mm6);
01551
01552 pmulhw_m2r(fix_141, mm6);
01553 punpckldq_r2r(mm3, mm0);
01554
01555 punpckhdq_r2r(mm3, mm2);
01556 movq_r2r(mm0, mm7);
01557
01558
01559
01560 paddw_r2r(mm2, mm0);
01561 psubw_r2r(mm2, mm7);
01562
01563
01564 psubw_r2r(mm2, mm6);
01565
01566
01567 movq_r2r(mm1, mm5);
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577 movq_m2r(*(wsptr), mm3);
01578 paddw_r2r(mm6, mm1);
01579
01580 movq_m2r(*(wsptr+1), mm4);
01581 psubw_r2r(mm6, mm5);
01582
01583 movq_r2r(mm3, mm6);
01584 punpckldq_r2r(mm4, mm3);
01585
01586 punpckhdq_r2r(mm6, mm4);
01587 movq_r2r(mm3, mm2);
01588
01589
01590 movq_r2m(mm0, *(wsptr));
01591 paddw_r2r(mm4, mm2);
01592
01593
01594
01595 movq_m2r(*(wsptr+2), mm6);
01596 psubw_r2r(mm4, mm3);
01597
01598 movq_m2r(*(wsptr+3), mm0);
01599 movq_r2r(mm6, mm4);
01600
01601 movq_r2m(mm1, *(wsptr+1));
01602 punpckldq_r2r(mm0, mm6);
01603
01604 punpckhdq_r2r(mm4, mm0);
01605 movq_r2r(mm6, mm1);
01606
01607
01608 paddw_r2r(mm0, mm6);
01609 movq_r2r(mm2, mm4);
01610
01611
01612 movq_r2m(mm5, *(wsptr+2));
01613 punpcklwd_r2r(mm6, mm2);
01614
01615 psubw_r2r(mm0, mm1);
01616 punpckhwd_r2r(mm6, mm4);
01617
01618 movq_r2r(mm3, mm0);
01619 punpcklwd_r2r(mm1, mm3);
01620
01621 movq_r2m(mm7, *(wsptr+3));
01622 punpckhwd_r2r(mm1, mm0);
01623
01624 movq_m2r(*(wsptr+4), mm6);
01625 punpckhdq_r2r(mm2, mm0);
01626
01627 movq_m2r(*(wsptr+5), mm7);
01628 punpckhdq_r2r(mm4, mm3);
01629
01630 movq_m2r(*(wsptr+6), mm1);
01631 movq_r2r(mm6, mm4);
01632
01633 punpckldq_r2r(mm7, mm6);
01634 movq_r2r(mm1, mm5);
01635
01636 punpckhdq_r2r(mm4, mm7);
01637 movq_r2r(mm6, mm2);
01638
01639 movq_m2r(*(wsptr+7), mm4);
01640 paddw_r2r(mm7, mm6);
01641
01642 psubw_r2r(mm7, mm2);
01643 punpckldq_r2r(mm4, mm1);
01644
01645 punpckhdq_r2r(mm5, mm4);
01646 movq_r2r(mm1, mm7);
01647
01648 paddw_r2r(mm4, mm1);
01649 psubw_r2r(mm4, mm7);
01650
01651 movq_r2r(mm6, mm5);
01652 punpcklwd_r2r(mm1, mm6);
01653
01654 punpckhwd_r2r(mm1, mm5);
01655 movq_r2r(mm2, mm4);
01656
01657 punpcklwd_r2r(mm7, mm2);
01658
01659 punpckhwd_r2r(mm7, mm4);
01660
01661 punpckhdq_r2r(mm6, mm4);
01662
01663 punpckhdq_r2r(mm5, mm2);
01664 movq_r2r(mm0, mm5);
01665
01666 punpckldq_r2r(mm4, mm0);
01667
01668 punpckhdq_r2r(mm4, mm5);
01669 movq_r2r(mm3, mm4);
01670
01671 punpckhdq_r2r(mm2, mm4);
01672 movq_r2r(mm5, mm1);
01673
01674 punpckldq_r2r(mm2, mm3);
01675 */
01676 */
01677 psubw_r2r(mm4, mm1);
01678
01679 paddw_r2r(mm4, mm5);
01680 */
01681 psllw_i2r(2, mm1);
01682
01683 psllw_i2r(2, mm0);
01684
01685 pmulhw_m2r(fix_141, mm1);
01686 */
01687 */
01688 psllw_i2r(2, mm3);
01689 movq_r2r(mm0, mm7);
01690
01691 pmulhw_m2r(fix_n184, mm7);
01692 movq_r2r(mm3, mm6);
01693
01694 movq_m2r(*(wsptr), mm2);
01695
01696 pmulhw_m2r(fix_108n184, mm6);
01697 */
01698 */
01699 movq_r2r(mm2, mm4);
01700
01701 pmulhw_m2r(fix_184n261, mm0);
01702 paddw_r2r(mm5, mm2);
01703
01704 pmulhw_m2r(fix_184, mm3);
01705 psubw_r2r(mm5, mm4);
01706
01707 */
01708 psraw_i2r(3, mm2);
01709
01710 paddw_r2r(mm6, mm7);
01711 psraw_i2r(3, mm4);
01712
01713 paddw_r2r(mm0, mm3);
01714
01715
01716 psubw_r2r(mm5, mm3);
01717
01718
01719 movq_m2r(*(wsptr+1), mm0);
01720 psubw_r2r(mm3, mm1);
01721
01722 movq_r2r(mm0, mm6);
01723 paddw_r2r(mm3, mm0);
01724
01725
01726
01727
01728
01729
01730
01731
01732
01733
01734
01735
01736
01737
01738 psubw_r2r(mm3, mm6);
01739 psraw_i2r(3, mm0);
01740
01741 psraw_i2r(3, mm6);
01742
01743 packuswb_r2r(mm4, mm0);
01744
01745 movq_m2r(*(wsptr+2), mm5);
01746 packuswb_r2r(mm6, mm2);
01747
01748
01749
01750
01751
01752 paddw_r2r(mm1, mm7);
01753 movq_r2r(mm5, mm3);
01754
01755 paddw_r2r(mm1, mm5);
01756 psubw_r2r(mm1, mm3);
01757
01758 psraw_i2r(3, mm5);
01759
01760 movq_m2r(*(wsptr+3), mm4);
01761 psraw_i2r(3, mm3);
01762
01763
01764
01765
01766
01767
01768
01769 movq_r2r(mm4, mm6);
01770 paddw_r2r(mm7, mm4);
01771
01772 psubw_r2r(mm7, mm6);
01773 psraw_i2r(3, mm4);
01774
01775
01776
01777 psraw_i2r(3, mm6);
01778
01779 packuswb_r2r(mm4, mm5);
01780
01781 packuswb_r2r(mm3, mm6);
01782 movq_r2r(mm2, mm4);
01783
01784 movq_r2r(mm5, mm7);
01785 punpcklbw_r2r(mm0, mm2);
01786
01787 punpckhbw_r2r(mm0, mm4);
01788 movq_r2r(mm2, mm1);
01789
01790 punpcklbw_r2r(mm6, mm5);
01791
01792
01793
01794 punpckhbw_r2r(mm6, mm7);
01795
01796 punpcklwd_r2r(mm5, mm2);
01797
01798
01799
01800 movq_r2r(mm7, mm6);
01801 punpckhwd_r2r(mm5, mm1);
01802
01803 movq_r2r(mm2, mm0);
01804 punpcklwd_r2r(mm4, mm6);
01805
01806
01807
01808 punpckldq_r2r(mm6, mm2);
01809
01810
01811
01812 movq_r2r(mm1, mm3);
01813
01814
01815
01816 punpckhwd_r2r(mm4, mm7);
01817
01818 movq_r2m(mm2, *(dataptr));
01819
01820 punpckhdq_r2r(mm6, mm0);
01821
01822 dataptr += rskip;
01823 movq_r2m(mm0, *(dataptr));
01824
01825 punpckldq_r2r(mm7, mm1);
01826 punpckhdq_r2r(mm7, mm3);
01827
01828 dataptr += rskip;
01829 movq_r2m(mm1, *(dataptr));
01830
01831 dataptr += rskip;
01832 movq_r2m(mm3, *(dataptr));
01833
01834
01835
01836 wsptr += 8;
01837
01838
01839
01840
01841
01842
01843
01844 movq_m2r(*(wsptr), mm0);
01845
01846 movq_m2r(*(wsptr+1), mm1);
01847 movq_r2r(mm0, mm2);
01848
01849 movq_m2r(*(wsptr+2), mm3);
01850 paddw_r2r(mm1, mm0);
01851
01852 movq_m2r(*(wsptr+3), mm4);
01853 psubw_r2r(mm1, mm2);
01854
01855 movq_r2r(mm0, mm6);
01856 movq_r2r(mm3, mm5);
01857
01858 paddw_r2r(mm4, mm3);
01859 movq_r2r(mm2, mm1);
01860
01861 psubw_r2r(mm4, mm5);
01862 punpcklwd_r2r(mm3, mm0);
01863
01864 movq_m2r(*(wsptr+7), mm7);
01865 punpckhwd_r2r(mm3, mm6);
01866
01867 movq_m2r(*(wsptr+4), mm3);
01868 punpckldq_r2r(mm6, mm0);
01869
01870 punpcklwd_r2r(mm5, mm1);
01871 movq_r2r(mm3, mm4);
01872
01873 movq_m2r(*(wsptr+6), mm6);
01874 punpckhwd_r2r(mm5, mm2);
01875
01876 movq_m2r(*(wsptr+5), mm5);
01877 punpckldq_r2r(mm2, mm1);
01878
01879 paddw_r2r(mm5, mm3);
01880 movq_r2r(mm6, mm2);
01881
01882 psubw_r2r(mm5, mm4);
01883 paddw_r2r(mm7, mm6);
01884
01885 movq_r2r(mm3, mm5);
01886 punpcklwd_r2r(mm6, mm3);
01887
01888 psubw_r2r(mm7, mm2);
01889 punpckhwd_r2r(mm6, mm5);
01890
01891 movq_r2r(mm4, mm7);
01892 punpckldq_r2r(mm5, mm3);
01893
01894 punpcklwd_r2r(mm2, mm4);
01895
01896 punpckhwd_r2r(mm2, mm7);
01897
01898 punpckldq_r2r(mm7, mm4);
01899 movq_r2r(mm1, mm6);
01900
01901
01902
01903
01904
01905
01906 movq_r2r(mm0, mm2);
01907 punpckhdq_r2r(mm4, mm6);
01908
01909 punpckldq_r2r(mm4, mm1);
01910 psllw_i2r(2, mm6);
01911
01912 pmulhw_m2r(fix_141, mm6);
01913 punpckldq_r2r(mm3, mm0);
01914
01915 punpckhdq_r2r(mm3, mm2);
01916 movq_r2r(mm0, mm7);
01917
01918
01919
01920 paddw_r2r(mm2, mm0);
01921 psubw_r2r(mm2, mm7);
01922
01923
01924 psubw_r2r(mm2, mm6);
01925
01926
01927 movq_r2r(mm1, mm5);
01928
01929
01930
01931
01932
01933
01934
01935
01936
01937
01938 movq_m2r(*(wsptr), mm3);
01939 paddw_r2r(mm6, mm1);
01940
01941 movq_m2r(*(wsptr+1), mm4);
01942 psubw_r2r(mm6, mm5);
01943
01944 movq_r2r(mm3, mm6);
01945 punpckldq_r2r(mm4, mm3);
01946
01947 punpckhdq_r2r(mm6, mm4);
01948 movq_r2r(mm3, mm2);
01949
01950
01951 movq_r2m(mm0, *(wsptr));
01952 paddw_r2r(mm4, mm2);
01953
01954
01955
01956 movq_m2r(*(wsptr+2), mm6);
01957 psubw_r2r(mm4, mm3);
01958
01959 movq_m2r(*(wsptr+3), mm0);
01960 movq_r2r(mm6, mm4);
01961
01962 movq_r2m(mm1, *(wsptr+1));
01963 punpckldq_r2r(mm0, mm6);
01964
01965 punpckhdq_r2r(mm4, mm0);
01966 movq_r2r(mm6, mm1);
01967
01968
01969 paddw_r2r(mm0, mm6);
01970 movq_r2r(mm2, mm4);
01971
01972
01973 movq_r2m(mm5, *(wsptr+2));
01974 punpcklwd_r2r(mm6, mm2);
01975
01976 psubw_r2r(mm0, mm1);
01977 punpckhwd_r2r(mm6, mm4);
01978
01979 movq_r2r(mm3, mm0);
01980 punpcklwd_r2r(mm1, mm3);
01981
01982 movq_r2m(mm7, *(wsptr+3));
01983 punpckhwd_r2r(mm1, mm0);
01984
01985 movq_m2r(*(wsptr+4), mm6);
01986 punpckhdq_r2r(mm2, mm0);
01987
01988 movq_m2r(*(wsptr+5), mm7);
01989 punpckhdq_r2r(mm4, mm3);
01990
01991 movq_m2r(*(wsptr+6), mm1);
01992 movq_r2r(mm6, mm4);
01993
01994 punpckldq_r2r(mm7, mm6);
01995 movq_r2r(mm1, mm5);
01996
01997 punpckhdq_r2r(mm4, mm7);
01998 movq_r2r(mm6, mm2);
01999
02000 movq_m2r(*(wsptr+7), mm4);
02001 paddw_r2r(mm7, mm6);
02002
02003 psubw_r2r(mm7, mm2);
02004 punpckldq_r2r(mm4, mm1);
02005
02006 punpckhdq_r2r(mm5, mm4);
02007 movq_r2r(mm1, mm7);
02008
02009 paddw_r2r(mm4, mm1);
02010 psubw_r2r(mm4, mm7);
02011
02012 movq_r2r(mm6, mm5);
02013 punpcklwd_r2r(mm1, mm6);
02014
02015 punpckhwd_r2r(mm1, mm5);
02016 movq_r2r(mm2, mm4);
02017
02018 punpcklwd_r2r(mm7, mm2);
02019
02020 punpckhwd_r2r(mm7, mm4);
02021
02022 punpckhdq_r2r(mm6, mm4);
02023
02024 punpckhdq_r2r(mm5, mm2);
02025 movq_r2r(mm0, mm5);
02026
02027 punpckldq_r2r(mm4, mm0);
02028
02029 punpckhdq_r2r(mm4, mm5);
02030 movq_r2r(mm3, mm4);
02031
02032 punpckhdq_r2r(mm2, mm4);
02033 movq_r2r(mm5, mm1);
02034
02035 punpckldq_r2r(mm2, mm3);
02036 */
02037 */
02038 psubw_r2r(mm4, mm1);
02039
02040 paddw_r2r(mm4, mm5);
02041 */
02042 psllw_i2r(2, mm1);
02043
02044 psllw_i2r(2, mm0);
02045
02046 pmulhw_m2r(fix_141, mm1);
02047 */
02048 */
02049 psllw_i2r(2, mm3);
02050 movq_r2r(mm0, mm7);
02051
02052 pmulhw_m2r(fix_n184, mm7);
02053 movq_r2r(mm3, mm6);
02054
02055 movq_m2r(*(wsptr), mm2);
02056
02057 pmulhw_m2r(fix_108n184, mm6);
02058 */
02059 */
02060 movq_r2r(mm2, mm4);
02061
02062 pmulhw_m2r(fix_184n261, mm0);
02063 paddw_r2r(mm5, mm2);
02064
02065 pmulhw_m2r(fix_184, mm3);
02066 psubw_r2r(mm5, mm4);
02067
02068 */
02069 psraw_i2r(3, mm2);
02070
02071 paddw_r2r(mm6, mm7);
02072 psraw_i2r(3, mm4);
02073
02074 paddw_r2r(mm0, mm3);
02075
02076
02077 psubw_r2r(mm5, mm3);
02078
02079
02080 movq_m2r(*(wsptr+1), mm0);
02081 psubw_r2r(mm3, mm1);
02082
02083 movq_r2r(mm0, mm6);
02084 paddw_r2r(mm3, mm0);
02085
02086
02087
02088
02089
02090
02091
02092
02093
02094
02095
02096
02097
02098 psubw_r2r(mm3, mm6);
02099 psraw_i2r(3, mm0);
02100
02101 psraw_i2r(3, mm6);
02102
02103 packuswb_r2r(mm4, mm0);
02104
02105 movq_m2r(*(wsptr+2), mm5);
02106 packuswb_r2r(mm6, mm2);
02107
02108
02109
02110
02111
02112 paddw_r2r(mm1, mm7);
02113 movq_r2r(mm5, mm3);
02114
02115 paddw_r2r(mm1, mm5);
02116 psubw_r2r(mm1, mm3);
02117
02118 psraw_i2r(3, mm5);
02119
02120 movq_m2r(*(wsptr+3), mm4);
02121 psraw_i2r(3, mm3);
02122
02123
02124
02125
02126
02127
02128
02129 movq_r2r(mm4, mm6);
02130 paddw_r2r(mm7, mm4);
02131
02132 psubw_r2r(mm7, mm6);
02133 psraw_i2r(3, mm4);
02134
02135 psraw_i2r(3, mm6);
02136
02137
02138
02139
02140
02141
02142
02143
02144
02145 packuswb_r2r(mm4, mm5);
02146
02147 packuswb_r2r(mm3, mm6);
02148 movq_r2r(mm2, mm4);
02149
02150 movq_r2r(mm5, mm7);
02151 punpcklbw_r2r(mm0, mm2);
02152
02153 punpckhbw_r2r(mm0, mm4);
02154 movq_r2r(mm2, mm1);
02155
02156 punpcklbw_r2r(mm6, mm5);
02157
02158 punpckhbw_r2r(mm6, mm7);
02159
02160 punpcklwd_r2r(mm5, mm2);
02161
02162 movq_r2r(mm7, mm6);
02163 punpckhwd_r2r(mm5, mm1);
02164
02165 movq_r2r(mm2, mm0);
02166 punpcklwd_r2r(mm4, mm6);
02167
02168 punpckldq_r2r(mm6, mm2);
02169
02170 movq_r2r(mm1, mm3);
02171
02172 punpckhwd_r2r(mm4, mm7);
02173
02174 dataptr += rskip;
02175 movq_r2m(mm2, *(dataptr));
02176
02177 punpckhdq_r2r(mm6, mm0);
02178
02179 dataptr += rskip;
02180 movq_r2m(mm0, *(dataptr));
02181
02182 punpckldq_r2r(mm7, mm1);
02183
02184 punpckhdq_r2r(mm7, mm3);
02185
02186 dataptr += rskip;
02187 movq_r2m(mm1, *(dataptr));
02188
02189 dataptr += rskip;
02190 movq_r2m(mm3, *(dataptr));
02191
02192 #else
02193 __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
02194 __s32 tmp10, tmp11, tmp12, tmp13;
02195 __s32 z5, z10, z11, z12, z13;
02196 __s16 *inptr;
02197 __s32 *wsptr;
02198 __u8 *outptr;
02199 int ctr;
02200 __s32 dcval;
02201 __s32 workspace[64];
02202
02203 inptr = data;
02204 wsptr = workspace;
02205 for (ctr = 8; ctr > 0; ctr--) {
02206
02207 if ((inptr[8] | inptr[16] | inptr[24] |
02208 inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) {
02209 dcval = inptr[0];
02210 wsptr[0] = dcval;
02211 wsptr[8] = dcval;
02212 wsptr[16] = dcval;
02213 wsptr[24] = dcval;
02214 wsptr[32] = dcval;
02215 wsptr[40] = dcval;
02216 wsptr[48] = dcval;
02217 wsptr[56] = dcval;
02218
02219 inptr++;
02220 wsptr++;
02221 continue;
02222 }
02223
02224 tmp0 = inptr[0];
02225 tmp1 = inptr[16];
02226 tmp2 = inptr[32];
02227 tmp3 = inptr[48];
02228
02229 tmp10 = tmp0 + tmp2;
02230 tmp11 = tmp0 - tmp2;
02231
02232 tmp13 = tmp1 + tmp3;
02233 tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13;
02234
02235 tmp0 = tmp10 + tmp13;
02236 tmp3 = tmp10 - tmp13;
02237 tmp1 = tmp11 + tmp12;
02238 tmp2 = tmp11 - tmp12;
02239
02240 tmp4 = inptr[8];
02241 tmp5 = inptr[24];
02242 tmp6 = inptr[40];
02243 tmp7 = inptr[56];
02244
02245 z13 = tmp6 + tmp5;
02246 z10 = tmp6 - tmp5;
02247 z11 = tmp4 + tmp7;
02248 z12 = tmp4 - tmp7;
02249
02250 tmp7 = z11 + z13;
02251 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
02252
02253 z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
02254 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
02255 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
02256
02257 tmp6 = tmp12 - tmp7;
02258 tmp5 = tmp11 - tmp6;
02259 tmp4 = tmp10 + tmp5;
02260
02261 wsptr[0] = (__s32) (tmp0 + tmp7);
02262 wsptr[56] = (__s32) (tmp0 - tmp7);
02263 wsptr[8] = (__s32) (tmp1 + tmp6);
02264 wsptr[48] = (__s32) (tmp1 - tmp6);
02265 wsptr[16] = (__s32) (tmp2 + tmp5);
02266 wsptr[40] = (__s32) (tmp2 - tmp5);
02267 wsptr[32] = (__s32) (tmp3 + tmp4);
02268 wsptr[24] = (__s32) (tmp3 - tmp4);
02269
02270 inptr++;
02271 wsptr++;
02272 }
02273
02274 wsptr = workspace;
02275 for (ctr = 0; ctr < 8; ctr++) {
02276 outptr = &(odata[ctr*rskip]);
02277
02278 tmp10 = wsptr[0] + wsptr[4];
02279 tmp11 = wsptr[0] - wsptr[4];
02280
02281 tmp13 = wsptr[2] + wsptr[6];
02282 tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13;
02283
02284 tmp0 = tmp10 + tmp13;
02285 tmp3 = tmp10 - tmp13;
02286 tmp1 = tmp11 + tmp12;
02287 tmp2 = tmp11 - tmp12;
02288
02289 z13 = wsptr[5] + wsptr[3];
02290 z10 = wsptr[5] - wsptr[3];
02291 z11 = wsptr[1] + wsptr[7];
02292 z12 = wsptr[1] - wsptr[7];
02293
02294 tmp7 = z11 + z13;
02295 tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
02296
02297 z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
02298 tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
02299 tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
02300
02301 tmp6 = tmp12 - tmp7;
02302 tmp5 = tmp11 - tmp6;
02303 tmp4 = tmp10 + tmp5;
02304
02305 outptr[0] = RL(DESCALE(tmp0 + tmp7));
02306 outptr[7] = RL(DESCALE(tmp0 - tmp7));
02307 outptr[1] = RL(DESCALE(tmp1 + tmp6));
02308 outptr[6] = RL(DESCALE(tmp1 - tmp6));
02309 outptr[2] = RL(DESCALE(tmp2 + tmp5));
02310 outptr[5] = RL(DESCALE(tmp2 - tmp5));
02311 outptr[4] = RL(DESCALE(tmp3 + tmp4));
02312 outptr[3] = RL(DESCALE(tmp3 - tmp4));
02313
02314 wsptr += 8;
02315 }
02316 #endif
02317 }
02318
02319
02320
02321
02322
02323
02324
02325
02326
02327
02328
02329
02330
02331
02332
02333
02334
02335
02336 void RTjpeg_init_data(void)
02337 {
02338 unsigned long dptr;
02339
02340 dptr=(unsigned long)&(RTjpeg_alldata[0]);
02341 dptr+=32;
02342 dptr=dptr>>5;
02343 dptr=dptr<<5;
02344
02345 RTjpeg_block=(__s16 *)dptr;
02346 dptr+=sizeof(__s16)*64;
02347 RTjpeg_lqt=(__s32 *)dptr;
02348 dptr+=sizeof(__s32)*64;
02349 RTjpeg_cqt=(__s32 *)dptr;
02350 dptr+=sizeof(__s32)*64;
02351 RTjpeg_liqt=(__u32 *)dptr;
02352 dptr+=sizeof(__u32)*64;
02353 RTjpeg_ciqt=(__u32 *)dptr;
02354 }
02355
02356
02357
02358
02359
02360
02361
02362
02363
02364
02365
02366
02367 void RTjpeg_init_Q(__u8 Q)
02368 {
02369 int i;
02370 __u64 qual;
02371
02372 qual=(__u64)Q<<(32-7);
02373
02374 for(i=0; i<64; i++)
02375 {
02376 RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
02377 if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
02378 RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
02379 if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
02380 RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
02381 RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
02382 RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
02383 RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
02384 }
02385
02386 RTjpeg_lb8=0;
02387 while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
02388 RTjpeg_lb8--;
02389 RTjpeg_cb8=0;
02390 while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
02391 RTjpeg_cb8--;
02392
02393 RTjpeg_dct_init();
02394 RTjpeg_idct_init();
02395 RTjpeg_quant_init();
02396 }
02397
02398
02399
02400
02401
02402
02403
02404
02405
02406
02407
02408
02409
02410
02411
02412 void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q)
02413 {
02414 int i;
02415 __u64 qual;
02416
02417 RTjpeg_init_data();
02418
02419 RTjpeg_width=width;
02420 RTjpeg_height=height;
02421 RTjpeg_Ywidth = RTjpeg_width>>3;
02422 RTjpeg_Ysize=width * height;
02423 RTjpeg_Cwidth = RTjpeg_width>>4;
02424 RTjpeg_Csize= (width>>1) * height;
02425
02426 qual=(__u64)Q<<(32-7);
02427
02428 for(i=0; i<64; i++)
02429 {
02430 RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3);
02431 if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1;
02432 RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3);
02433 if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1;
02434 RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3);
02435 RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3);
02436 RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3;
02437 RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3;
02438 }
02439
02440 RTjpeg_lb8=0;
02441 while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
02442 RTjpeg_lb8--;
02443 RTjpeg_cb8=0;
02444 while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
02445 RTjpeg_cb8--;
02446
02447 RTjpeg_dct_init();
02448 RTjpeg_quant_init();
02449
02450 for(i=0; i<64; i++)
02451 buf[i]=RTjpeg_liqt[i];
02452 for(i=0; i<64; i++)
02453 buf[64+i]=RTjpeg_ciqt[i];
02454 }
02455
02456 void RTjpeg_init_decompress(__u32 *buf, int width, int height)
02457 {
02458 int i;
02459
02460 RTjpeg_init_data();
02461
02462 RTjpeg_width=width;
02463 RTjpeg_height=height;
02464 RTjpeg_Ywidth = RTjpeg_width>>3;
02465 RTjpeg_Ysize=width * height;
02466 RTjpeg_Cwidth = RTjpeg_width>>4;
02467 RTjpeg_Csize= (width>>1) * height;
02468
02469 for(i=0; i<64; i++)
02470 {
02471 RTjpeg_liqt[i]=buf[i];
02472 RTjpeg_ciqt[i]=buf[i+64];
02473 }
02474
02475 RTjpeg_lb8=0;
02476 while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
02477 RTjpeg_lb8--;
02478 RTjpeg_cb8=0;
02479 while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
02480 RTjpeg_cb8--;
02481
02482 RTjpeg_idct_init();
02483
02484
02485 }
02486
02487 int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp)
02488 {
02489 __s8 * sb;
02490 register __s8 * bp1 = bp + (RTjpeg_width<<3);
02491 register __s8 * bp2 = bp + RTjpeg_Ysize;
02492 register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
02493 register int i, j, k;
02494
02495 #ifdef USE_MMX
02496 emms();
02497 #endif
02498 sb=sp;
02499
02500 for(i=RTjpeg_height>>1; i; i-=8)
02501 {
02502 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
02503 {
02504 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
02505 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02506 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02507
02508 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
02509 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02510 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02511
02512 RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth);
02513 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02514 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02515
02516 RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth);
02517 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02518 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02519
02520 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
02521 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02522 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02523
02524 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
02525 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02526 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02527
02528 }
02529 bp+=RTjpeg_width<<4;
02530 bp1+=RTjpeg_width<<4;
02531 bp2+=RTjpeg_width<<2;
02532 bp3+=RTjpeg_width<<2;
02533
02534 }
02535 #ifdef USE_MMX
02536 emms();
02537 #endif
02538 return (sp-sb);
02539 }
02540
02541 int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp)
02542 {
02543 __s8 * sb;
02544 register __s8 * bp2 = bp + RTjpeg_Ysize;
02545 register __s8 * bp3 = bp2 + RTjpeg_Csize;
02546 register int i, j, k;
02547
02548 #ifdef USE_MMX
02549 emms();
02550 #endif
02551 sb=sp;
02552
02553 for(i=RTjpeg_height; i; i-=8)
02554 {
02555 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
02556 {
02557 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
02558 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02559 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02560
02561 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
02562 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02563 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02564
02565 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
02566 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02567 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02568
02569 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
02570 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02571 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02572
02573 }
02574 bp+=RTjpeg_width<<3;
02575 bp2+=RTjpeg_width<<2;
02576 bp3+=RTjpeg_width<<2;
02577
02578 }
02579 #ifdef USE_MMX
02580 emms();
02581 #endif
02582 return (sp-sb);
02583 }
02584
02585 int RTjpeg_compress8(__s8 *sp, unsigned char *bp)
02586 {
02587 __s8 * sb;
02588 int i, j;
02589
02590 #ifdef USE_MMX
02591 emms();
02592 #endif
02593
02594 sb=sp;
02595
02596 for(i=0; i<RTjpeg_height; i+=8)
02597 {
02598 for(j=0; j<RTjpeg_width; j+=8)
02599 {
02600 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
02601 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02602 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02603 }
02604 bp+=RTjpeg_width;
02605 }
02606
02607 #ifdef USE_MMX
02608 emms();
02609 #endif
02610 return (sp-sb);
02611 }
02612
02613 void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp)
02614 {
02615 register __s8 * bp2 = bp + RTjpeg_Ysize;
02616 register __s8 * bp3 = bp2 + (RTjpeg_Csize);
02617 int i, j,k;
02618
02619 #ifdef USE_MMX
02620 emms();
02621 #endif
02622
02623
02624 for(i=RTjpeg_height; i; i-=8)
02625 {
02626 for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
02627 if(*sp==-1)sp++;
02628 else
02629 {
02630 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
02631 RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
02632 }
02633 if(*sp==-1)sp++;
02634 else
02635 {
02636 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
02637 RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
02638 }
02639 if(*sp==-1)sp++;
02640 else
02641 {
02642 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
02643 RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
02644 }
02645 if(*sp==-1)sp++;
02646 else
02647 {
02648 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
02649 RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
02650 }
02651 }
02652 bp+=RTjpeg_width<<3;
02653 bp2+=RTjpeg_width<<2;
02654 bp3+=RTjpeg_width<<2;
02655 }
02656 #ifdef USE_MMX
02657 emms();
02658 #endif
02659 }
02660
02661 void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp)
02662 {
02663 register __s8 * bp1 = bp + (RTjpeg_width<<3);
02664 register __s8 * bp2 = bp + RTjpeg_Ysize;
02665 register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
02666 int i, j,k;
02667
02668 #ifdef USE_MMX
02669 emms();
02670 #endif
02671
02672
02673 for(i=RTjpeg_height>>1; i; i-=8)
02674 {
02675 for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
02676 if(*sp==-1)sp++;
02677 else
02678 {
02679 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
02680 RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
02681 }
02682 if(*sp==-1)sp++;
02683 else
02684 {
02685 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
02686 RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
02687 }
02688 if(*sp==-1)sp++;
02689 else
02690 {
02691 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
02692 RTjpeg_idct(bp1+j, RTjpeg_block, RTjpeg_width);
02693 }
02694 if(*sp==-1)sp++;
02695 else
02696 {
02697 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
02698 RTjpeg_idct(bp1+j+8, RTjpeg_block, RTjpeg_width);
02699 }
02700 if(*sp==-1)sp++;
02701 else
02702 {
02703 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
02704 RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
02705 }
02706 if(*sp==-1)sp++;
02707 else
02708 {
02709 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
02710 RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
02711 }
02712 }
02713 bp+=RTjpeg_width<<4;
02714 bp1+=RTjpeg_width<<4;
02715 bp2+=RTjpeg_width<<2;
02716 bp3+=RTjpeg_width<<2;
02717 }
02718 #ifdef USE_MMX
02719 emms();
02720 #endif
02721 }
02722
02723 void RTjpeg_decompress8(__s8 *sp, __u8 *bp)
02724 {
02725 int i, j;
02726
02727 #ifdef USE_MMX
02728 emms();
02729 #endif
02730
02731
02732 for(i=0; i<RTjpeg_height; i+=8)
02733 {
02734 for(j=0; j<RTjpeg_width; j+=8)
02735 if(*sp==-1)sp++;
02736 else
02737 {
02738 sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
02739 RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
02740 }
02741 bp+=RTjpeg_width<<3;
02742 }
02743 }
02744
02745
02746
02747
02748
02749
02750
02751
02752 void RTjpeg_init_mcompress(void)
02753 {
02754 unsigned long tmp;
02755
02756 if(!RTjpeg_old)
02757 {
02758 RTjpeg_old=malloc((4*RTjpeg_width*RTjpeg_height)+32);
02759 tmp=(unsigned long)RTjpeg_old;
02760 tmp+=32;
02761 tmp=tmp>>5;
02762 RTjpeg_old=(__s16 *)(tmp<<5);
02763 }
02764 if (!RTjpeg_old)
02765 {
02766 fprintf(stderr, "RTjpeg: Could not allocate memory\n");
02767 exit(-1);
02768 }
02769 bzero(RTjpeg_old, ((4*RTjpeg_width*RTjpeg_height)));
02770 }
02771
02772 #ifdef USE_MMX
02773
02774 int RTjpeg_bcomp(__s16 *old, mmx_t *mask)
02775 {
02776 int i;
02777 mmx_t *mold=(mmx_t *)old;
02778 mmx_t *mblock=(mmx_t *)RTjpeg_block;
02779 mmx_t result;
02780 static mmx_t neg=(mmx_t)(unsigned long long)0xffffffffffffffffULL;
02781
02782 movq_m2r(*mask, mm7);
02783 movq_m2r(neg, mm6);
02784 pxor_r2r(mm5, mm5);
02785
02786 for(i=0; i<8; i++)
02787 {
02788 movq_m2r(*(mblock++), mm0);
02789 movq_m2r(*(mblock++), mm2);
02790 movq_m2r(*(mold++), mm1);
02791 movq_m2r(*(mold++), mm3);
02792 psubsw_r2r(mm1, mm0);
02793 psubsw_r2r(mm3, mm2);
02794 movq_r2r(mm0, mm1);
02795 movq_r2r(mm2, mm3);
02796 pcmpgtw_r2r(mm7, mm0);
02797 pcmpgtw_r2r(mm7, mm2);
02798 pxor_r2r(mm6, mm1);
02799 pxor_r2r(mm6, mm3);
02800 pcmpgtw_r2r(mm7, mm1);
02801 pcmpgtw_r2r(mm7, mm3);
02802 por_r2r(mm0, mm5);
02803 por_r2r(mm2, mm5);
02804 por_r2r(mm1, mm5);
02805 por_r2r(mm3, mm5);
02806 }
02807 movq_r2m(mm5, result);
02808
02809 if(result.q)
02810 {
02811 if(!RTjpeg_mtest)
02812 for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
02813 return 0;
02814 }
02815
02816 return 1;
02817 }
02818
02819 #else
02820 int RTjpeg_bcomp(__s16 *old, __u16 *mask)
02821 {
02822 int i;
02823
02824 for(i=0; i<64; i++)
02825 if(abs(old[i]-RTjpeg_block[i])>*mask)
02826 {
02827 if(!RTjpeg_mtest)
02828 for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
02829 return 0;
02830 }
02831 return 1;
02832 }
02833 #endif
02834
02835 void RTjpeg_set_test(int i)
02836 {
02837 RTjpeg_mtest=i;
02838 }
02839
02840 int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
02841 {
02842 __s8 * sb;
02843 __s16 *block;
02844 register __s8 * bp1 = bp + (RTjpeg_width<<3);
02845 register __s8 * bp2 = bp + RTjpeg_Ysize;
02846 register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
02847 register int i, j, k;
02848
02849 #ifdef USE_MMX
02850 emms();
02851 RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
02852 RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask);
02853 #else
02854 RTjpeg_lmask=lmask;
02855 RTjpeg_cmask=cmask;
02856 #endif
02857
02858 sb=sp;
02859 block=RTjpeg_old;
02860
02861 for(i=RTjpeg_height>>1; i; i-=8)
02862 {
02863 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
02864 {
02865 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
02866 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02867 if(RTjpeg_bcomp(block, &RTjpeg_lmask))
02868 {
02869 *((__u8 *)sp++)=255;
02870 }
02871 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02872 block+=64;
02873
02874 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
02875 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02876 if(RTjpeg_bcomp(block, &RTjpeg_lmask))
02877 {
02878 *((__u8 *)sp++)=255;
02879 }
02880 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02881 block+=64;
02882
02883 RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth);
02884 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02885 if(RTjpeg_bcomp(block, &RTjpeg_lmask))
02886 {
02887 *((__u8 *)sp++)=255;
02888 }
02889 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02890 block+=64;
02891
02892 RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth);
02893 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02894 if(RTjpeg_bcomp(block, &RTjpeg_lmask))
02895 {
02896 *((__u8 *)sp++)=255;
02897 }
02898 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02899 block+=64;
02900
02901 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
02902 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02903 if(RTjpeg_bcomp(block, &RTjpeg_cmask))
02904 {
02905 *((__u8 *)sp++)=255;
02906 }
02907 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02908 block+=64;
02909
02910 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
02911 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02912 if(RTjpeg_bcomp(block, &RTjpeg_cmask))
02913 {
02914 *((__u8 *)sp++)=255;
02915 }
02916 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02917 block+=64;
02918 }
02919 bp+=RTjpeg_width<<4;
02920 bp1+=RTjpeg_width<<4;
02921 bp2+=RTjpeg_width<<2;
02922 bp3+=RTjpeg_width<<2;
02923
02924 }
02925 #ifdef USE_MMX
02926 emms();
02927 #endif
02928 return (sp-sb);
02929 }
02930
02931
02932 int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
02933 {
02934 __s8 * sb;
02935 __s16 *block;
02936 register __s8 * bp2;
02937 register __s8 * bp3;
02938 register int i, j, k;
02939
02940 #ifdef USE_MMX
02941 emms();
02942 RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
02943 RTjpeg_cmask=(mmx_t)(((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask);
02944 #else
02945 RTjpeg_lmask=lmask;
02946 RTjpeg_cmask=cmask;
02947 #endif
02948
02949 bp = bp - RTjpeg_width*0;
02950 bp2 = bp + RTjpeg_Ysize-RTjpeg_width*0;
02951 bp3 = bp2 + RTjpeg_Csize;
02952
02953 sb=sp;
02954 block=RTjpeg_old;
02955
02956 for(i=RTjpeg_height; i; i-=8)
02957 {
02958 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
02959 {
02960 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
02961 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02962 if(RTjpeg_bcomp(block, &RTjpeg_lmask))
02963 {
02964 *((__u8 *)sp++)=255;
02965 }
02966 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02967 block+=64;
02968
02969 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
02970 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
02971 if(RTjpeg_bcomp(block, &RTjpeg_lmask))
02972 {
02973 *((__u8 *)sp++)=255;
02974 }
02975 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
02976 block+=64;
02977
02978 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
02979 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02980 if(RTjpeg_bcomp(block, &RTjpeg_cmask))
02981 {
02982 *((__u8 *)sp++)=255;
02983 }
02984 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02985 block+=64;
02986
02987 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
02988 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
02989 if(RTjpeg_bcomp(block, &RTjpeg_cmask))
02990 {
02991 *((__u8 *)sp++)=255;
02992 }
02993 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
02994 block+=64;
02995
02996 }
02997 bp+=RTjpeg_width<<3;
02998 bp2+=RTjpeg_width<<2;
02999 bp3+=RTjpeg_width<<2;
03000 }
03001 printf ("%d\n", block - RTjpeg_old);
03002 #ifdef USE_MMX
03003 emms();
03004 #endif
03005 return (sp-sb);
03006 }
03007
03008 int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask)
03009 {
03010 __s8 * sb;
03011 __s16 *block;
03012 int i, j;
03013
03014 #ifdef USE_MMX
03015 emms();
03016 RTjpeg_lmask=(mmx_t)(((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask);
03017 #else
03018 RTjpeg_lmask=lmask;
03019 #endif
03020
03021
03022 sb=sp;
03023 block=RTjpeg_old;
03024
03025 for(i=0; i<RTjpeg_height; i+=8)
03026 {
03027 for(j=0; j<RTjpeg_width; j+=8)
03028 {
03029 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
03030 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
03031 if(RTjpeg_bcomp(block, &RTjpeg_lmask))
03032 {
03033 *((__u8 *)sp++)=255;
03034
03035 } else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
03036 block+=64;
03037 }
03038 bp+=RTjpeg_width<<3;
03039 }
03040 #ifdef USE_MMX
03041 emms();
03042 #endif
03043 return (sp-sb);
03044 }
03045
03046 void RTjpeg_color_init(void)
03047 {
03048 }
03049
03050 #define KcrR 76284
03051 #define KcrG 53281
03052 #define KcbG 25625
03053 #define KcbB 132252
03054 #define Ky 76284
03055
03056 void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride)
03057 {
03058 int tmp;
03059 int i, j;
03060 __s32 y, crR, crG, cbG, cbB;
03061 __u8 *bufcr, *bufcb, *bufy, *bufoute;
03062 int yskip;
03063
03064 yskip=RTjpeg_width;
03065
03066 bufcb=&buf[RTjpeg_width*RTjpeg_height];
03067 bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
03068 bufy=&buf[0];
03069 bufoute=rgb;
03070
03071 for(i=0; i<(RTjpeg_height); i++)
03072 {
03073 for(j=0; j<RTjpeg_width; j+=2)
03074 {
03075 crR=(*bufcr-128)*KcrR;
03076 crG=(*(bufcr++)-128)*KcrG;
03077 cbG=(*bufcb-128)*KcbG;
03078 cbB=(*(bufcb++)-128)*KcbB;
03079
03080 y=(bufy[j]-16)*Ky;
03081
03082 tmp=(y+crR)>>16;
03083 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03084 tmp=(y-crG-cbG)>>16;
03085 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03086 tmp=(y+cbB)>>16;
03087 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03088
03089 y=(bufy[j+1]-16)*Ky;
03090
03091 tmp=(y+crR)>>16;
03092 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03093 tmp=(y-crG-cbG)>>16;
03094 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03095 tmp=(y+cbB)>>16;
03096 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03097
03098 }
03099 bufy+=yskip;
03100 }
03101 }
03102
03103
03104 void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride)
03105 {
03106 int tmp;
03107 int i, j;
03108 __s32 y, crR, crG, cbG, cbB;
03109 __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
03110 int oskip, yskip;
03111
03112 if(stride==0)
03113 oskip=RTjpeg_width*3;
03114 else
03115 oskip=2*stride-RTjpeg_width*3;
03116
03117 yskip=RTjpeg_width;
03118
03119 bufcb=&buf[RTjpeg_width*RTjpeg_height];
03120 bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
03121 bufy=&buf[0];
03122 bufoute=rgb;
03123 bufouto=rgb+RTjpeg_width*3;
03124
03125 for(i=0; i<(RTjpeg_height>>1); i++)
03126 {
03127 for(j=0; j<RTjpeg_width; j+=2)
03128 {
03129 crR=(*bufcr-128)*KcrR;
03130 crG=(*(bufcr++)-128)*KcrG;
03131 cbG=(*bufcb-128)*KcbG;
03132 cbB=(*(bufcb++)-128)*KcbB;
03133
03134 y=(bufy[j]-16)*Ky;
03135
03136 tmp=(y+crR)>>16;
03137 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03138 tmp=(y-crG-cbG)>>16;
03139 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03140 tmp=(y+cbB)>>16;
03141 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03142
03143 y=(bufy[j+1]-16)*Ky;
03144
03145 tmp=(y+crR)>>16;
03146 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03147 tmp=(y-crG-cbG)>>16;
03148 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03149 tmp=(y+cbB)>>16;
03150 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03151
03152 y=(bufy[j+yskip]-16)*Ky;
03153
03154 tmp=(y+crR)>>16;
03155 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03156 tmp=(y-crG-cbG)>>16;
03157 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03158 tmp=(y+cbB)>>16;
03159 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03160
03161 y=(bufy[j+1+yskip]-16)*Ky;
03162
03163 tmp=(y+crR)>>16;
03164 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03165 tmp=(y-crG-cbG)>>16;
03166 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03167 tmp=(y+cbB)>>16;
03168 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03169
03170 }
03171 bufoute+=oskip;
03172 bufouto+=oskip;
03173 bufy+=yskip<<1;
03174 }
03175 }
03176
03177
03178 void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride)
03179 {
03180 int tmp;
03181 int i, j;
03182 __s32 y, crR, crG, cbG, cbB;
03183 __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
03184 int oskip, yskip;
03185
03186 if(stride==0)
03187 oskip=RTjpeg_width*4;
03188 else
03189 oskip = 2*stride-RTjpeg_width*4;
03190 yskip=RTjpeg_width;
03191
03192 bufcb=&buf[RTjpeg_width*RTjpeg_height];
03193 bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
03194 bufy=&buf[0];
03195 bufoute=rgb;
03196 bufouto=rgb+RTjpeg_width*4;
03197
03198 for(i=0; i<(RTjpeg_height>>1); i++)
03199 {
03200 for(j=0; j<RTjpeg_width; j+=2)
03201 {
03202 crR=(*bufcr-128)*KcrR;
03203 crG=(*(bufcr++)-128)*KcrG;
03204 cbG=(*bufcb-128)*KcbG;
03205 cbB=(*(bufcb++)-128)*KcbB;
03206
03207 y=(bufy[j]-16)*Ky;
03208
03209 tmp=(y+cbB)>>16;
03210 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03211 tmp=(y-crG-cbG)>>16;
03212 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03213 tmp=(y+crR)>>16;
03214 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03215 bufoute++;
03216
03217 y=(bufy[j+1]-16)*Ky;
03218
03219 tmp=(y+cbB)>>16;
03220 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03221 tmp=(y-crG-cbG)>>16;
03222 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03223 tmp=(y+crR)>>16;
03224 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03225 bufoute++;
03226
03227 y=(bufy[j+yskip]-16)*Ky;
03228
03229 tmp=(y+cbB)>>16;
03230 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03231 tmp=(y-crG-cbG)>>16;
03232 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03233 tmp=(y+crR)>>16;
03234 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03235 bufouto++;
03236
03237 y=(bufy[j+1+yskip]-16)*Ky;
03238
03239 tmp=(y+cbB)>>16;
03240 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03241 tmp=(y-crG-cbG)>>16;
03242 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03243 tmp=(y+crR)>>16;
03244 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03245 bufouto++;
03246
03247 }
03248 bufoute+=oskip;
03249 bufouto+=oskip;
03250 bufy+=yskip<<1;
03251 }
03252 }
03253
03254 void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride)
03255 {
03256 int tmp;
03257 int i, j;
03258 __s32 y, crR, crG, cbG, cbB;
03259 __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
03260 int oskip, yskip;
03261
03262 if(stride==0)
03263 oskip=RTjpeg_width*3;
03264 else
03265 oskip=2*stride - RTjpeg_width*3;
03266
03267 yskip=RTjpeg_width;
03268
03269 bufcb=&buf[RTjpeg_width*RTjpeg_height];
03270 bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
03271 bufy=&buf[0];
03272 bufoute=rgb;
03273 bufouto=rgb+RTjpeg_width*3;
03274
03275 for(i=0; i<(RTjpeg_height>>1); i++)
03276 {
03277 for(j=0; j<RTjpeg_width; j+=2)
03278 {
03279 crR=(*bufcr-128)*KcrR;
03280 crG=(*(bufcr++)-128)*KcrG;
03281 cbG=(*bufcb-128)*KcbG;
03282 cbB=(*(bufcb++)-128)*KcbB;
03283
03284 y=(bufy[j]-16)*Ky;
03285
03286 tmp=(y+cbB)>>16;
03287 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03288 tmp=(y-crG-cbG)>>16;
03289 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03290 tmp=(y+crR)>>16;
03291 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03292
03293 y=(bufy[j+1]-16)*Ky;
03294
03295 tmp=(y+cbB)>>16;
03296 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03297 tmp=(y-crG-cbG)>>16;
03298 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03299 tmp=(y+crR)>>16;
03300 *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
03301
03302 y=(bufy[j+yskip]-16)*Ky;
03303
03304 tmp=(y+cbB)>>16;
03305 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03306 tmp=(y-crG-cbG)>>16;
03307 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03308 tmp=(y+crR)>>16;
03309 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03310
03311 y=(bufy[j+1+yskip]-16)*Ky;
03312
03313 tmp=(y+cbB)>>16;
03314 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03315 tmp=(y-crG-cbG)>>16;
03316 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03317 tmp=(y+crR)>>16;
03318 *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
03319
03320 }
03321 bufoute+=oskip;
03322 bufouto+=oskip;
03323 bufy+=yskip<<1;
03324 }
03325 }
03326
03327 void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride)
03328 {
03329 int tmp;
03330 int i, j;
03331 __s32 y, crR, crG, cbG, cbB;
03332 __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
03333 int oskip, yskip;
03334 unsigned char r, g, b;
03335
03336 if(stride==0)
03337 oskip=RTjpeg_width*2;
03338 else
03339 oskip=2*stride-RTjpeg_width*2;
03340
03341 yskip=RTjpeg_width;
03342
03343 bufcb=&buf[RTjpeg_width*RTjpeg_height];
03344 bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
03345 bufy=&buf[0];
03346 bufoute=rgb;
03347 bufouto=rgb+RTjpeg_width*2;
03348
03349 for(i=0; i<(RTjpeg_height>>1); i++)
03350 {
03351 for(j=0; j<RTjpeg_width; j+=2)
03352 {
03353 crR=(*bufcr-128)*KcrR;
03354 crG=(*(bufcr++)-128)*KcrG;
03355 cbG=(*bufcb-128)*KcbG;
03356 cbB=(*(bufcb++)-128)*KcbB;
03357
03358 y=(bufy[j]-16)*Ky;
03359
03360 tmp=(y+cbB)>>16;
03361 b=(tmp>255)?255:((tmp<0)?0:tmp);
03362 tmp=(y-crG-cbG)>>16;
03363 g=(tmp>255)?255:((tmp<0)?0:tmp);
03364 tmp=(y+crR)>>16;
03365 r=(tmp>255)?255:((tmp<0)?0:tmp);
03366 tmp=(int)((int)b >> 3);
03367 tmp|=(int)(((int)g >> 2) << 5);
03368 tmp|=(int)(((int)r >> 3) << 11);
03369 *(bufoute++)=tmp&0xff;
03370 *(bufoute++)=tmp>>8;
03371
03372
03373 y=(bufy[j+1]-16)*Ky;
03374
03375 tmp=(y+cbB)>>16;
03376 b=(tmp>255)?255:((tmp<0)?0:tmp);
03377 tmp=(y-crG-cbG)>>16;
03378 g=(tmp>255)?255:((tmp<0)?0:tmp);
03379 tmp=(y+crR)>>16;
03380 r=(tmp>255)?255:((tmp<0)?0:tmp);
03381 tmp=(int)((int)b >> 3);
03382 tmp|=(int)(((int)g >> 2) << 5);
03383 tmp|=(int)(((int)r >> 3) << 11);
03384 *(bufoute++)=tmp&0xff;
03385 *(bufoute++)=tmp>>8;
03386
03387 y=(bufy[j+yskip]-16)*Ky;
03388
03389 tmp=(y+cbB)>>16;
03390 b=(tmp>255)?255:((tmp<0)?0:tmp);
03391 tmp=(y-crG-cbG)>>16;
03392 g=(tmp>255)?255:((tmp<0)?0:tmp);
03393 tmp=(y+crR)>>16;
03394 r=(tmp>255)?255:((tmp<0)?0:tmp);
03395 tmp=(int)((int)b >> 3);
03396 tmp|=(int)(((int)g >> 2) << 5);
03397 tmp|=(int)(((int)r >> 3) << 11);
03398 *(bufouto++)=tmp&0xff;
03399 *(bufouto++)=tmp>>8;
03400
03401 y=(bufy[j+1+yskip]-16)*Ky;
03402
03403 tmp=(y+cbB)>>16;
03404 b=(tmp>255)?255:((tmp<0)?0:tmp);
03405 tmp=(y-crG-cbG)>>16;
03406 g=(tmp>255)?255:((tmp<0)?0:tmp);
03407 tmp=(y+crR)>>16;
03408 r=(tmp>255)?255:((tmp<0)?0:tmp);
03409 tmp=(int)((int)b >> 3);
03410 tmp|=(int)(((int)g >> 2) << 5);
03411 tmp|=(int)(((int)r >> 3) << 11);
03412 *(bufouto++)=tmp&0xff;
03413 *(bufouto++)=tmp>>8;
03414
03415 }
03416 bufoute+=oskip;
03417 bufouto+=oskip;
03418 bufy+=yskip<<1;
03419 }
03420 }
03421
03422
03423
03424 void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride)
03425 {
03426 bcopy(buf, rgb, RTjpeg_width*RTjpeg_height);
03427 }
03428