00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "kmimemagic.h"
00019 #include <kdebug.h>
00020 #include <kapplication.h>
00021 #include <qfile.h>
00022 #include <ksimpleconfig.h>
00023 #include <kstandarddirs.h>
00024 #include <kstaticdeleter.h>
00025 #include <klargefile.h>
00026 #include <assert.h>
00027
00028 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00029 static void process(struct config_rec* conf, const QString &);
00030 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00031 static int tagmagic(unsigned char *buf, int nbytes);
00032 static int textmagic(struct config_rec* conf, unsigned char *, int);
00033
00034 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00035 static int match(struct config_rec* conf, unsigned char *, int);
00036
00037 KMimeMagic* KMimeMagic::s_pSelf;
00038 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00039
00040 KMimeMagic* KMimeMagic::self()
00041 {
00042 if( !s_pSelf )
00043 initStatic();
00044 return s_pSelf;
00045 }
00046
00047 void KMimeMagic::initStatic()
00048 {
00049 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00050 s_pSelf->setFollowLinks( true );
00051 }
00052
00053 #include <stdio.h>
00054 #include <unistd.h>
00055 #include <stdlib.h>
00056 #include <sys/wait.h>
00057 #include <sys/types.h>
00058 #include <sys/stat.h>
00059 #include <fcntl.h>
00060 #include <errno.h>
00061 #include <ctype.h>
00062 #include <time.h>
00063 #include <utime.h>
00064 #include <stdarg.h>
00065 #include <qregexp.h>
00066 #include <qstring.h>
00067
00068
00069
00070
00071
00072
00073
00074
00075 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00076 #define DEBUG_LINENUMBERS
00077 #endif
00078
00079
00080
00081
00082 #define DECLINED 999
00083 #define ERROR 998
00084 #define OK 0
00085
00086
00087
00088
00089 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00090 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00091 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00092 #define MIME_TEXT_UNKNOWN "text/plain"
00093 #define MIME_TEXT_PLAIN "text/plain"
00094 #define MIME_INODE_DIR "inode/directory"
00095 #define MIME_INODE_CDEV "inode/chardevice"
00096 #define MIME_INODE_BDEV "inode/blockdevice"
00097 #define MIME_INODE_FIFO "inode/fifo"
00098 #define MIME_INODE_LINK "inode/link"
00099 #define MIME_INODE_SOCK "inode/socket"
00100
00101 #define MIME_APPL_TROFF "application/x-troff"
00102 #define MIME_APPL_TAR "application/x-tar"
00103 #define MIME_TEXT_FORTRAN "text/x-fortran"
00104
00105 #define MAXMIMESTRING 256
00106
00107 #define HOWMANY 1024
00108 #define MAXDESC 50
00109 #define MAXstring 64
00110
00111 typedef union VALUETYPE {
00112 unsigned char b;
00113 unsigned short h;
00114 unsigned long l;
00115 char s[MAXstring];
00116 unsigned char hs[2];
00117 unsigned char hl[4];
00118 } VALUETYPE;
00119
00120 struct magic {
00121 struct magic *next;
00122 #ifdef DEBUG_LINENUMBERS
00123 int lineno;
00124 #endif
00125
00126 short flag;
00127 #define INDIR 1
00128 #define UNSIGNED 2
00129 short cont_level;
00130 struct {
00131 char type;
00132 long offset;
00133 } in;
00134 long offset;
00135 unsigned char reln;
00136 char type;
00137 char vallen;
00138 #define BYTE 1
00139 #define SHORT 2
00140 #define LONG 4
00141 #define STRING 5
00142 #define DATE 6
00143 #define BESHORT 7
00144 #define BELONG 8
00145 #define BEDATE 9
00146 #define LESHORT 10
00147 #define LELONG 11
00148 #define LEDATE 12
00149 VALUETYPE value;
00150 unsigned long mask;
00151 char nospflag;
00152
00153
00154 char desc[MAXDESC];
00155 };
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171 #define RECORDSIZE 512
00172 #define NAMSIZ 100
00173 #define TUNMLEN 32
00174 #define TGNMLEN 32
00175
00176 union record {
00177 char charptr[RECORDSIZE];
00178 struct header {
00179 char name[NAMSIZ];
00180 char mode[8];
00181 char uid[8];
00182 char gid[8];
00183 char size[12];
00184 char mtime[12];
00185 char chksum[8];
00186 char linkflag;
00187 char linkname[NAMSIZ];
00188 char magic[8];
00189 char uname[TUNMLEN];
00190 char gname[TGNMLEN];
00191 char devmajor[8];
00192 char devminor[8];
00193 } header;
00194 };
00195
00196
00197 #define TMAGIC "ustar "
00198
00199
00200
00201
00202 static int is_tar(unsigned char *, int);
00203 static unsigned long signextend(struct magic *, unsigned long);
00204 static int getvalue(struct magic *, char **);
00205 static int hextoint(int);
00206 static char *getstr(char *, char *, int, int *);
00207 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00208 static int mcheck(union VALUETYPE *, struct magic *);
00209 static int mconvert(union VALUETYPE *, struct magic *);
00210 static long from_oct(int, char *);
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226 #define L_HTML 0x001
00227 #define L_C 0x002
00228 #define L_MAKE 0x004
00229 #define L_PLI 0x008
00230 #define L_MACH 0x010
00231 #define L_PAS 0x020
00232 #define L_JAVA 0x040
00233 #define L_CPP 0x080
00234 #define L_MAIL 0x100
00235 #define L_NEWS 0x200
00236 #define L_DIFF 0x400
00237
00238 #define P_HTML 0
00239 #define P_C 1
00240 #define P_MAKE 2
00241 #define P_PLI 3
00242 #define P_MACH 4
00243 #define P_PAS 5
00244 #define P_JAVA 6
00245 #define P_CPP 7
00246 #define P_MAIL 8
00247 #define P_NEWS 9
00248 #define P_DIFF 10
00249
00250 typedef struct asc_type {
00251 const char *type;
00252 int kwords;
00253 double weight;
00254 } asc_type;
00255
00256 static const asc_type types[] = {
00257 { "text/html", 19, 2 },
00258 { "text/x-c", 9, 1.3 },
00259 { "text/x-makefile", 4, 1.9 },
00260 { "text/x-pli", 1, 3 },
00261 { "text/x-assembler", 6, 2.1 },
00262 { "text/x-pascal", 1, 1 },
00263 { "text/x-java", 14, 1 },
00264 { "text/x-c++", 14, 1 },
00265 { "message/rfc822", 4, 1.9 },
00266 { "message/news", 3, 2 },
00267 { "text/x-diff", 4, 2 }
00268 };
00269
00270 #define NTYPES (sizeof(types)/sizeof(asc_type))
00271
00272 static struct names {
00273 const char *name;
00274 short type;
00275 } const names[] = {
00276 {
00277 "<html", L_HTML
00278 },
00279 {
00280 "<HTML", L_HTML
00281 },
00282 {
00283 "<head", L_HTML
00284 },
00285 {
00286 "<HEAD", L_HTML
00287 },
00288 {
00289 "<body", L_HTML
00290 },
00291 {
00292 "<BODY", L_HTML
00293 },
00294 {
00295 "<title", L_HTML
00296 },
00297 {
00298 "<TITLE", L_HTML
00299 },
00300 {
00301 "<h1", L_HTML
00302 },
00303 {
00304 "<H1", L_HTML
00305 },
00306 {
00307 "<a", L_HTML
00308 },
00309 {
00310 "<A", L_HTML
00311 },
00312 {
00313 "<img", L_HTML
00314 },
00315 {
00316 "<IMG", L_HTML
00317 },
00318 {
00319 "<!--", L_HTML
00320 },
00321 {
00322 "<!doctype", L_HTML
00323 },
00324 {
00325 "<!DOCTYPE", L_HTML
00326 },
00327 {
00328 "<div", L_HTML
00329 },
00330 {
00331 "<DIV", L_HTML
00332 },
00333 {
00334 "<frame", L_HTML
00335 },
00336 {
00337 "<FRAME", L_HTML
00338 },
00339 {
00340 "<frameset", L_HTML
00341 },
00342 {
00343 "<FRAMESET", L_HTML
00344 },
00345 {
00346 "<script", L_HTML
00347 },
00348 {
00349 "<SCRIPT", L_HTML
00350 },
00351 {
00352 "/*", L_C|L_CPP|L_JAVA
00353 },
00354 {
00355 "//", L_CPP|L_JAVA
00356 },
00357 {
00358 "#include", L_C|L_CPP
00359 },
00360 {
00361 "char", L_C|L_CPP|L_JAVA
00362 },
00363 {
00364 "double", L_C|L_CPP|L_JAVA
00365 },
00366 {
00367 "extern", L_C|L_CPP
00368 },
00369 {
00370 "float", L_C|L_CPP|L_JAVA
00371 },
00372 {
00373 "real", L_C|L_CPP|L_JAVA
00374 },
00375 {
00376 "struct", L_C|L_CPP
00377 },
00378 {
00379 "union", L_C|L_CPP
00380 },
00381 {
00382 "implements", L_JAVA
00383 },
00384 {
00385 "super", L_JAVA
00386 },
00387 {
00388 "import", L_JAVA
00389 },
00390 {
00391 "class", L_CPP|L_JAVA
00392 },
00393 {
00394 "public", L_CPP|L_JAVA
00395 },
00396 {
00397 "private", L_CPP|L_JAVA
00398 },
00399 {
00400 "CFLAGS", L_MAKE
00401 },
00402 {
00403 "LDFLAGS", L_MAKE
00404 },
00405 {
00406 "all:", L_MAKE
00407 },
00408 {
00409 ".PHONY:", L_MAKE
00410 },
00411 {
00412 "srcdir", L_MAKE
00413 },
00414 {
00415 "exec_prefix", L_MAKE
00416 },
00417
00418
00419
00420
00421 {
00422 ".ascii", L_MACH
00423 },
00424 {
00425 ".asciiz", L_MACH
00426 },
00427 {
00428 ".byte", L_MACH
00429 },
00430 {
00431 ".even", L_MACH
00432 },
00433 {
00434 ".globl", L_MACH
00435 },
00436 {
00437 "clr", L_MACH
00438 },
00439 {
00440 "(input", L_PAS
00441 },
00442 {
00443 "dcl", L_PLI
00444 },
00445 {
00446 "Received:", L_MAIL
00447 },
00448
00449
00450
00451 {
00452 "Return-Path:", L_MAIL
00453 },
00454 {
00455 "Cc:", L_MAIL
00456 },
00457 {
00458 "Newsgroups:", L_NEWS
00459 },
00460 {
00461 "Path:", L_NEWS
00462 },
00463 {
00464 "Organization:", L_NEWS
00465 },
00466 {
00467 "---", L_DIFF
00468 },
00469 {
00470 "+++", L_DIFF
00471 },
00472 {
00473 "***", L_DIFF
00474 },
00475 {
00476 "@@", L_DIFF
00477 },
00478 {
00479 NULL, 0
00480 }
00481 };
00482
00493 class KMimeMagicUtimeConf
00494 {
00495 public:
00496 KMimeMagicUtimeConf()
00497 {
00498 tmpDirs << QString::fromLatin1("/tmp");
00499
00500
00501
00502 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00503 if ( !confDirs.isEmpty() )
00504 {
00505 QString globalConf = confDirs.last() + "kmimemagicrc";
00506 if ( QFile::exists( globalConf ) )
00507 {
00508 KSimpleConfig cfg( globalConf );
00509 cfg.setGroup( "Settings" );
00510 tmpDirs = cfg.readListEntry( "atimeDirs" );
00511 }
00512 if ( confDirs.count() > 1 )
00513 {
00514 QString localConf = confDirs.first() + "kmimemagicrc";
00515 if ( QFile::exists( localConf ) )
00516 {
00517 KSimpleConfig cfg( localConf );
00518 cfg.setGroup( "Settings" );
00519 tmpDirs += cfg.readListEntry( "atimeDirs" );
00520 }
00521 }
00522 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00523 {
00524 QString dir = *it;
00525 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00526 (*it) += '/';
00527 }
00528 }
00529 #if 0
00530
00531 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00532 kdDebug(7018) << " atimeDir: " << *it << endl;
00533 #endif
00534 }
00535
00536 bool restoreAccessTime( const QString & file ) const
00537 {
00538 QString dir = file.left( file.findRev( '/' ) );
00539 bool res = tmpDirs.contains( dir );
00540
00541 return res;
00542 }
00543 QStringList tmpDirs;
00544 };
00545
00546
00547 struct config_rec {
00548 bool followLinks;
00549 QString resultBuf;
00550 int accuracy;
00551
00552 struct magic *magic,
00553 *last;
00554 KMimeMagicUtimeConf * utimeConf;
00555 };
00556
00557 #ifdef MIME_MAGIC_DEBUG_TABLE
00558 static void
00559 test_table()
00560 {
00561 struct magic *m;
00562 struct magic *prevm = NULL;
00563
00564 kdDebug(7018) << "test_table : started" << endl;
00565 for (m = conf->magic; m; m = m->next) {
00566 if (isprint((((unsigned long) m) >> 24) & 255) &&
00567 isprint((((unsigned long) m) >> 16) & 255) &&
00568 isprint((((unsigned long) m) >> 8) & 255) &&
00569 isprint(((unsigned long) m) & 255)) {
00570
00571
00572 (((unsigned long) m) >> 24) & 255,
00573 (((unsigned long) m) >> 16) & 255,
00574 (((unsigned long) m) >> 8) & 255,
00575 ((unsigned long) m) & 255,
00576 prevm ? prevm->lineno : -1);
00577 break;
00578 }
00579 prevm = m;
00580 }
00581 }
00582 #endif
00583
00584 #define EATAB {while (isascii((unsigned char) *l) && \
00585 isspace((unsigned char) *l)) ++l;}
00586
00587 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00588 {
00589 int ws_offset;
00590
00591
00592 if (line[0]) {
00593 line[strlen(line) - 1] = '\0';
00594 }
00595
00596 ws_offset = 0;
00597 while (line[ws_offset] && isspace(line[ws_offset])) {
00598 ws_offset++;
00599 }
00600
00601
00602 if (line[ws_offset] == 0) {
00603 return 0;
00604 }
00605
00606 if (line[ws_offset] == '#')
00607 return 0;
00608
00609
00610 (*rule)++;
00611
00612
00613 return (parse(line + ws_offset, lineno) != 0);
00614 }
00615
00616
00617
00618
00619 int KMimeMagic::apprentice( const QString& magicfile )
00620 {
00621 FILE *f;
00622 char line[BUFSIZ + 1];
00623 int errs = 0;
00624 int lineno;
00625 int rule = 0;
00626 QCString fname;
00627
00628 if (magicfile.isEmpty())
00629 return -1;
00630 fname = QFile::encodeName(magicfile);
00631 f = fopen(fname, "r");
00632 if (f == NULL) {
00633 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00634 return -1;
00635 }
00636
00637
00638 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00639 if (parse_line(line, &rule, lineno))
00640 errs++;
00641
00642 fclose(f);
00643
00644 #ifdef DEBUG_APPRENTICE
00645 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00646 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00647 #endif
00648
00649 #ifdef MIME_MAGIC_DEBUG_TABLE
00650 test_table();
00651 #endif
00652
00653 return (errs ? -1 : 0);
00654 }
00655
00656 int KMimeMagic::buff_apprentice(char *buff)
00657 {
00658 char line[BUFSIZ + 2];
00659 int errs = 0;
00660 int lineno = 1;
00661 char *start = buff;
00662 char *end;
00663 int count = 0;
00664 int rule = 0;
00665 int len = strlen(buff) + 1;
00666
00667
00668 do {
00669 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00670 strncpy(line, start, count);
00671 line[count] = '\0';
00672 if ((end = strchr(line, '\n'))) {
00673 *(++end) = '\0';
00674 count = strlen(line);
00675 } else
00676 strcat(line, "\n");
00677 start += count;
00678 len -= count;
00679 if (parse_line(line, &rule, lineno))
00680 errs++;
00681 lineno++;
00682 } while (len > 0);
00683
00684 #ifdef DEBUG_APPRENTICE
00685 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00686 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00687 #endif
00688
00689 #ifdef MIME_MAGIC_DEBUG_TABLE
00690 test_table();
00691 #endif
00692
00693 return (errs ? -1 : 0);
00694 }
00695
00696
00697
00698
00699 static unsigned long
00700 signextend(struct magic *m, unsigned long v)
00701 {
00702 if (!(m->flag & UNSIGNED))
00703 switch (m->type) {
00704
00705
00706
00707
00708
00709 case BYTE:
00710 v = (char) v;
00711 break;
00712 case SHORT:
00713 case BESHORT:
00714 case LESHORT:
00715 v = (short) v;
00716 break;
00717 case DATE:
00718 case BEDATE:
00719 case LEDATE:
00720 case LONG:
00721 case BELONG:
00722 case LELONG:
00723 v = (long) v;
00724 break;
00725 case STRING:
00726 break;
00727 default:
00728 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00729 return ERROR;
00730 }
00731 return v;
00732 }
00733
00734
00735
00736
00737 int KMimeMagic::parse(char *l, int
00738 #ifdef DEBUG_LINENUMBERS
00739 lineno
00740 #endif
00741 )
00742 {
00743 int i = 0;
00744 struct magic *m;
00745 char *t,
00746 *s;
00747
00748 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00749 kdError(7018) << "parse: Out of memory." << endl;
00750 return -1;
00751 }
00752
00753 m->next = NULL;
00754 if (!conf->magic || !conf->last) {
00755 conf->magic = conf->last = m;
00756 } else {
00757 conf->last->next = m;
00758 conf->last = m;
00759 }
00760
00761
00762 m->flag = 0;
00763 m->cont_level = 0;
00764 #ifdef DEBUG_LINENUMBERS
00765 m->lineno = lineno;
00766 #endif
00767
00768 while (*l == '>') {
00769 ++l;
00770 m->cont_level++;
00771 }
00772
00773 if (m->cont_level != 0 && *l == '(') {
00774 ++l;
00775 m->flag |= INDIR;
00776 }
00777
00778 m->offset = (int) strtol(l, &t, 0);
00779 if (l == t) {
00780 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00781 }
00782 l = t;
00783
00784 if (m->flag & INDIR) {
00785 m->in.type = LONG;
00786 m->in.offset = 0;
00787
00788
00789
00790 if (*l == '.') {
00791 switch (*++l) {
00792 case 'l':
00793 m->in.type = LONG;
00794 break;
00795 case 's':
00796 m->in.type = SHORT;
00797 break;
00798 case 'b':
00799 m->in.type = BYTE;
00800 break;
00801 default:
00802 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00803 break;
00804 }
00805 l++;
00806 }
00807 s = l;
00808 if (*l == '+' || *l == '-')
00809 l++;
00810 if (isdigit((unsigned char) *l)) {
00811 m->in.offset = strtol(l, &t, 0);
00812 if (*s == '-')
00813 m->in.offset = -m->in.offset;
00814 } else
00815 t = l;
00816 if (*t++ != ')') {
00817 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00818 }
00819 l = t;
00820 }
00821 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00822 ++l;
00823 EATAB;
00824
00825 #define NBYTE 4
00826 #define NSHORT 5
00827 #define NLONG 4
00828 #define NSTRING 6
00829 #define NDATE 4
00830 #define NBESHORT 7
00831 #define NBELONG 6
00832 #define NBEDATE 6
00833 #define NLESHORT 7
00834 #define NLELONG 6
00835 #define NLEDATE 6
00836
00837 if (*l == 'u') {
00838 ++l;
00839 m->flag |= UNSIGNED;
00840 }
00841
00842 if (strncmp(l, "byte", NBYTE) == 0) {
00843 m->type = BYTE;
00844 l += NBYTE;
00845 } else if (strncmp(l, "short", NSHORT) == 0) {
00846 m->type = SHORT;
00847 l += NSHORT;
00848 } else if (strncmp(l, "long", NLONG) == 0) {
00849 m->type = LONG;
00850 l += NLONG;
00851 } else if (strncmp(l, "string", NSTRING) == 0) {
00852 m->type = STRING;
00853 l += NSTRING;
00854 } else if (strncmp(l, "date", NDATE) == 0) {
00855 m->type = DATE;
00856 l += NDATE;
00857 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00858 m->type = BESHORT;
00859 l += NBESHORT;
00860 } else if (strncmp(l, "belong", NBELONG) == 0) {
00861 m->type = BELONG;
00862 l += NBELONG;
00863 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00864 m->type = BEDATE;
00865 l += NBEDATE;
00866 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00867 m->type = LESHORT;
00868 l += NLESHORT;
00869 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00870 m->type = LELONG;
00871 l += NLELONG;
00872 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00873 m->type = LEDATE;
00874 l += NLEDATE;
00875 } else {
00876 kdError(7018) << "parse: type " << l << " invalid" << endl;
00877 return -1;
00878 }
00879
00880 if (*l == '&') {
00881 ++l;
00882 m->mask = signextend(m, strtol(l, &l, 0));
00883 } else
00884 m->mask = (unsigned long) ~0L;
00885 EATAB;
00886
00887 switch (*l) {
00888 case '>':
00889 case '<':
00890
00891 case '&':
00892 case '^':
00893 case '=':
00894 m->reln = *l;
00895 ++l;
00896 break;
00897 case '!':
00898 if (m->type != STRING) {
00899 m->reln = *l;
00900 ++l;
00901 break;
00902 }
00903
00904 default:
00905 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00906 isspace((unsigned char) l[1])) {
00907 m->reln = *l;
00908 ++l;
00909 goto GetDesc;
00910 }
00911 m->reln = '=';
00912 break;
00913 }
00914 EATAB;
00915
00916 if (getvalue(m, &l))
00917 return -1;
00918
00919
00920
00921 GetDesc:
00922 EATAB;
00923 if (l[0] == '\b') {
00924 ++l;
00925 m->nospflag = 1;
00926 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00927 ++l;
00928 ++l;
00929 m->nospflag = 1;
00930 } else
00931 m->nospflag = 0;
00932
00933 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00934 m->desc[i++] = *l++;
00935 m->desc[i] = '\0';
00936
00937 while (--i>0 && isspace( m->desc[i] ))
00938 m->desc[i] = '\0';
00939
00940
00941
00942
00943 #ifdef DEBUG_APPRENTICE
00944 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00945 #endif
00946 return 0;
00947 }
00948
00949
00950
00951
00952
00953
00954 static int
00955 getvalue(struct magic *m, char **p)
00956 {
00957 int slen;
00958
00959 if (m->type == STRING) {
00960 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00961 m->vallen = slen;
00962 } else if (m->reln != 'x')
00963 m->value.l = signextend(m, strtol(*p, p, 0));
00964 return 0;
00965 }
00966
00967
00968
00969
00970
00971
00972 static char *
00973 getstr(register char *s, register char *p, int plen, int *slen)
00974 {
00975 char *origs = s,
00976 *origp = p;
00977 char *pmax = p + plen - 1;
00978 register int c;
00979 register int val;
00980
00981 while ((c = *s++) != '\0') {
00982 if (isspace((unsigned char) c))
00983 break;
00984 if (p >= pmax) {
00985 kdError(7018) << "String too long: " << origs << endl;
00986 break;
00987 }
00988 if (c == '\\') {
00989 switch (c = *s++) {
00990
00991 case '\0':
00992 goto out;
00993
00994 default:
00995 *p++ = (char) c;
00996 break;
00997
00998 case 'n':
00999 *p++ = '\n';
01000 break;
01001
01002 case 'r':
01003 *p++ = '\r';
01004 break;
01005
01006 case 'b':
01007 *p++ = '\b';
01008 break;
01009
01010 case 't':
01011 *p++ = '\t';
01012 break;
01013
01014 case 'f':
01015 *p++ = '\f';
01016 break;
01017
01018 case 'v':
01019 *p++ = '\v';
01020 break;
01021
01022
01023 case '0':
01024 case '1':
01025 case '2':
01026 case '3':
01027 case '4':
01028 case '5':
01029 case '6':
01030 case '7':
01031 val = c - '0';
01032 c = *s++;
01033 if (c >= '0' && c <= '7') {
01034 val = (val << 3) | (c - '0');
01035 c = *s++;
01036 if (c >= '0' && c <= '7')
01037 val = (val << 3) | (c - '0');
01038 else
01039 --s;
01040 } else
01041 --s;
01042 *p++ = (char) val;
01043 break;
01044
01045
01046 case 'x':
01047 val = 'x';
01048 c = hextoint(*s++);
01049 if (c >= 0) {
01050 val = c;
01051 c = hextoint(*s++);
01052 if (c >= 0) {
01053 val = (val << 4) + c;
01054 c = hextoint(*s++);
01055 if (c >= 0) {
01056 val = (val << 4) + c;
01057 } else
01058 --s;
01059 } else
01060 --s;
01061 } else
01062 --s;
01063 *p++ = (char) val;
01064 break;
01065 }
01066 } else
01067 *p++ = (char) c;
01068 }
01069 out:
01070 *p = '\0';
01071 *slen = p - origp;
01072
01073
01074 return s;
01075 }
01076
01077
01078
01079 static int
01080 hextoint(int c)
01081 {
01082 if (!isascii((unsigned char) c))
01083 return -1;
01084 if (isdigit((unsigned char) c))
01085 return c - '0';
01086 if ((c >= 'a') && (c <= 'f'))
01087 return c + 10 - 'a';
01088 if ((c >= 'A') && (c <= 'F'))
01089 return c + 10 - 'A';
01090 return -1;
01091 }
01092
01093
01094
01095
01096 static int
01097 mconvert(union VALUETYPE *p, struct magic *m)
01098 {
01099 switch (m->type) {
01100 case BYTE:
01101 return 1;
01102 case STRING:
01103
01104 p->s[sizeof(p->s) - 1] = '\0';
01105 return 1;
01106 #ifndef WORDS_BIGENDIAN
01107 case SHORT:
01108 #endif
01109 case BESHORT:
01110 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01111 return 1;
01112 #ifndef WORDS_BIGENDIAN
01113 case LONG:
01114 case DATE:
01115 #endif
01116 case BELONG:
01117 case BEDATE:
01118 p->l = (long)
01119 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01120 return 1;
01121 #ifdef WORDS_BIGENDIAN
01122 case SHORT:
01123 #endif
01124 case LESHORT:
01125 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01126 return 1;
01127 #ifdef WORDS_BIGENDIAN
01128 case LONG:
01129 case DATE:
01130 #endif
01131 case LELONG:
01132 case LEDATE:
01133 p->l = (long)
01134 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01135 return 1;
01136 default:
01137 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01138 return 0;
01139 }
01140 }
01141
01142
01143 static int
01144 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01145 int nbytes)
01146 {
01147 long offset = m->offset;
01148
01149
01150
01151 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01152 {
01153 int have = nbytes - offset;
01154 memset(p, 0, sizeof(union VALUETYPE));
01155 if (have > 0)
01156 memcpy(p, s + offset, have);
01157 } else
01158 memcpy(p, s + offset, sizeof(union VALUETYPE));
01159
01160 if (!mconvert(p, m))
01161 return 0;
01162
01163 if (m->flag & INDIR) {
01164
01165 switch (m->in.type) {
01166 case BYTE:
01167 offset = p->b + m->in.offset;
01168 break;
01169 case SHORT:
01170 offset = p->h + m->in.offset;
01171 break;
01172 case LONG:
01173 offset = p->l + m->in.offset;
01174 break;
01175 }
01176
01177 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01178 return 0;
01179
01180 memcpy(p, s + offset, sizeof(union VALUETYPE));
01181
01182 if (!mconvert(p, m))
01183 return 0;
01184 }
01185 return 1;
01186 }
01187
01188 static int
01189 mcheck(union VALUETYPE *p, struct magic *m)
01190 {
01191 register unsigned long l = m->value.l;
01192 register unsigned long v;
01193 int matched;
01194
01195 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01196 kdError(7018) << "BOINK" << endl;
01197 return 1;
01198 }
01199 switch (m->type) {
01200 case BYTE:
01201 v = p->b;
01202 break;
01203
01204 case SHORT:
01205 case BESHORT:
01206 case LESHORT:
01207 v = p->h;
01208 break;
01209
01210 case LONG:
01211 case BELONG:
01212 case LELONG:
01213 case DATE:
01214 case BEDATE:
01215 case LEDATE:
01216 v = p->l;
01217 break;
01218
01219 case STRING:
01220 l = 0;
01221
01222
01223
01224
01225
01226 v = 0;
01227 {
01228 register unsigned char *a = (unsigned char *) m->value.s;
01229 register unsigned char *b = (unsigned char *) p->s;
01230 register int len = m->vallen;
01231 Q_ASSERT(len);
01232
01233 while (--len >= 0)
01234 if ((v = *b++ - *a++) != 0)
01235 break;
01236 }
01237 break;
01238 default:
01239 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01240 return 0;
01241 }
01242 #if 0
01243 qDebug("Before signextend %08x", v);
01244 #endif
01245 v = signextend(m, v) & m->mask;
01246 #if 0
01247 qDebug("After signextend %08x", v);
01248 #endif
01249
01250 switch (m->reln) {
01251 case 'x':
01252 matched = 1;
01253 break;
01254
01255 case '!':
01256 matched = v != l;
01257 break;
01258
01259 case '=':
01260 matched = v == l;
01261 break;
01262
01263 case '>':
01264 if (m->flag & UNSIGNED)
01265 matched = v > l;
01266 else
01267 matched = (long) v > (long) l;
01268 break;
01269
01270 case '<':
01271 if (m->flag & UNSIGNED)
01272 matched = v < l;
01273 else
01274 matched = (long) v < (long) l;
01275 break;
01276
01277 case '&':
01278 matched = (v & l) == l;
01279 break;
01280
01281 case '^':
01282 matched = (v & l) != l;
01283 break;
01284
01285 default:
01286 matched = 0;
01287 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01288 break;
01289 }
01290
01291 return matched;
01292 }
01293
01294 #if 0
01295
01296
01297 typedef enum {
01298 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
01299 } rsl_states;
01300
01301
01302 int
01303 KMimeMagic::finishResult()
01304 {
01305 int cur_pos,
01306 type_pos,
01307 type_len,
01308 encoding_pos,
01309 encoding_len;
01310
01311 int state;
01312
01313 state = rsl_leading_space;
01314 type_pos = type_len = 0;
01315 encoding_pos = encoding_len = 0;
01316
01317
01318 for (cur_pos = 0; cur_pos < (int)resultBuf.length(); cur_pos++) {
01319 if (resultBuf[cur_pos].isSpace()) {
01320
01321 if (state == rsl_leading_space) {
01322
01323 continue;
01324 } else if (state == rsl_type) {
01325
01326 return DECLINED;
01327 } else if (state == rsl_subtype) {
01328
01329 state++;
01330 continue;
01331 } else if (state == rsl_separator) {
01332
01333 continue;
01334 } else if (state == rsl_encoding) {
01335
01336
01337 break;
01338 } else {
01339
01340
01341 kdError(7018) << "KMimeMagic::finishResult: bad state " << state << " (ws)" << endl;
01342 return DECLINED;
01343 }
01344
01345 } else if (state == rsl_type &&
01346 resultBuf.at(cur_pos) == '/') {
01347
01348 type_len++;
01349 state++;
01350 } else {
01351
01352 if (state == rsl_leading_space) {
01353
01354 state++;
01355 type_pos = cur_pos;
01356 type_len = 1;
01357 continue;
01358 } else if (state == rsl_type ||
01359 state == rsl_subtype) {
01360
01361 type_len++;
01362 continue;
01363 } else if (state == rsl_separator) {
01364
01365 state++;
01366 encoding_pos = cur_pos;
01367 encoding_len = 1;
01368 continue;
01369 } else if (state == rsl_encoding) {
01370
01371 encoding_len++;
01372 continue;
01373 } else {
01374
01375
01376 kdError(7018) << " KMimeMagic::finishResult: bad state " << state << " (ns)" << endl;
01377 return DECLINED;
01378 }
01379
01380 }
01381
01382 }
01383
01384
01385 if (state != rsl_subtype && state != rsl_separator &&
01386 state != rsl_encoding) {
01387
01388 return DECLINED;
01389 }
01390
01391 if (state == rsl_subtype || state == rsl_encoding ||
01392 state == rsl_encoding || state == rsl_separator) {
01393 magicResult->setMimeType(conf->resultBuf.mid(type_pos, type_len).ascii());
01394 }
01395 if (state == rsl_encoding)
01396 magicResult->setEncoding(conf->resultBuf.mid(encoding_pos,
01397 encoding_len).ascii());
01398
01399 if (!magicResult->mimeType() ||
01400 (state == rsl_encoding && !magicResult->encoding())) {
01401 return -1;
01402 }
01403
01404 return OK;
01405 }
01406 #endif
01407
01408
01409
01410
01411
01412
01413 static void process(struct config_rec* conf, const QString & fn)
01414 {
01415 int fd = 0;
01416 unsigned char buf[HOWMANY + 1];
01417 KDE_struct_stat sb;
01418 int nbytes = 0;
01419 int tagbytes = 0;
01420 QCString fileName = QFile::encodeName( fn );
01421
01422
01423
01424
01425 if (fsmagic(conf, fileName, &sb) != 0) {
01426
01427 return;
01428 }
01429 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01430
01431
01432
01433
01434
01435
01436 conf->resultBuf = MIME_BINARY_UNREADABLE;
01437 return;
01438 }
01439
01440
01441
01442 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01443 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01444 conf->resultBuf = MIME_BINARY_UNREADABLE;
01445 return;
01446 }
01447 if ((tagbytes = tagmagic(buf, nbytes))) {
01448
01449 lseek(fd, tagbytes, SEEK_SET);
01450 nbytes = read(fd, (char*)buf, HOWMANY);
01451 if (nbytes < 0) {
01452 conf->resultBuf = MIME_BINARY_UNREADABLE;
01453 return;
01454 }
01455 }
01456 if (nbytes == 0) {
01457 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01458 } else {
01459 buf[nbytes++] = '\0';
01460 tryit(conf, buf, nbytes);
01461 }
01462
01463 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01464 {
01465
01466
01467
01468
01469
01470 struct utimbuf utbuf;
01471 utbuf.actime = sb.st_atime;
01472 utbuf.modtime = sb.st_mtime;
01473 (void) utime(fileName, &utbuf);
01474 }
01475 (void) close(fd);
01476 }
01477
01478
01479 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01480 {
01481
01482 if (match(conf, buf, nb))
01483 return;
01484
01485
01486 if (ascmagic(conf, buf, nb) == 1)
01487 return;
01488
01489
01490 if (textmagic(conf, buf, nb))
01491 return;
01492
01493
01494 conf->resultBuf = MIME_BINARY_UNKNOWN;
01495 conf->accuracy = 0;
01496 }
01497
01498 static int
01499 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01500 {
01501 int ret = 0;
01502
01503
01504
01505
01506
01507 ret = KDE_lstat(fn, sb);
01508
01509 if (ret) {
01510 return 1;
01511
01512 }
01513
01514
01515
01516
01517
01518
01519 switch (sb->st_mode & S_IFMT) {
01520 case S_IFDIR:
01521 conf->resultBuf = MIME_INODE_DIR;
01522 return 1;
01523 case S_IFCHR:
01524 conf->resultBuf = MIME_INODE_CDEV;
01525 return 1;
01526 case S_IFBLK:
01527 conf->resultBuf = MIME_INODE_BDEV;
01528 return 1;
01529
01530 #ifdef S_IFIFO
01531 case S_IFIFO:
01532 conf->resultBuf = MIME_INODE_FIFO;
01533 return 1;
01534 #endif
01535 #ifdef S_IFLNK
01536 case S_IFLNK:
01537 {
01538 char buf[BUFSIZ + BUFSIZ + 4];
01539 register int nch;
01540 KDE_struct_stat tstatbuf;
01541
01542 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01543 conf->resultBuf = MIME_INODE_LINK;
01544
01545 return 1;
01546 }
01547 buf[nch] = '\0';
01548
01549 if (*buf == '/') {
01550 if (KDE_stat(buf, &tstatbuf) < 0) {
01551 conf->resultBuf = MIME_INODE_LINK;
01552
01553 return 1;
01554 }
01555 } else {
01556 char *tmp;
01557 char buf2[BUFSIZ + BUFSIZ + 4];
01558
01559 strncpy(buf2, fn, BUFSIZ);
01560 buf2[BUFSIZ] = 0;
01561
01562 if ((tmp = strrchr(buf2, '/')) == NULL) {
01563 tmp = buf;
01564 } else {
01565
01566 *++tmp = '\0';
01567 strcat(buf2, buf);
01568 tmp = buf2;
01569 }
01570 if (KDE_stat(tmp, &tstatbuf) < 0) {
01571 conf->resultBuf = MIME_INODE_LINK;
01572
01573 return 1;
01574 } else
01575 strcpy(buf, tmp);
01576 }
01577 if (conf->followLinks)
01578 process( conf, QFile::decodeName( buf ) );
01579 else
01580 conf->resultBuf = MIME_INODE_LINK;
01581 return 1;
01582 }
01583 return 1;
01584 #endif
01585 #ifdef S_IFSOCK
01586 #ifndef __COHERENT__
01587 case S_IFSOCK:
01588 conf->resultBuf = MIME_INODE_SOCK;
01589 return 1;
01590 #endif
01591 #endif
01592 case S_IFREG:
01593 break;
01594 default:
01595 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01596
01597 }
01598
01599
01600
01601
01602 if (sb->st_size == 0) {
01603 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01604 return 1;
01605 }
01606 return 0;
01607 }
01608
01609
01610
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632
01633
01634
01635 static int
01636 match(struct config_rec* conf, unsigned char *s, int nbytes)
01637 {
01638 int cont_level = 0;
01639 union VALUETYPE p;
01640 struct magic *m;
01641
01642 #ifdef DEBUG_MIMEMAGIC
01643 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01644 for (m = conf->magic; m; m = m->next) {
01645 if (isprint((((unsigned long) m) >> 24) & 255) &&
01646 isprint((((unsigned long) m) >> 16) & 255) &&
01647 isprint((((unsigned long) m) >> 8) & 255) &&
01648 isprint(((unsigned long) m) & 255)) {
01649 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01650 break;
01651 }
01652 }
01653 #endif
01654
01655 for (m = conf->magic; m; m = m->next) {
01656 #ifdef DEBUG_MIMEMAGIC
01657 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01658 #endif
01659 memset(&p, 0, sizeof(union VALUETYPE));
01660
01661
01662 if (!mget(&p, s, m, nbytes) ||
01663 !mcheck(&p, m)) {
01664 struct magic *m_cont;
01665
01666
01667
01668
01669 if (!m->next || (m->next->cont_level == 0)) {
01670 continue;
01671 }
01672 m_cont = m->next;
01673 while (m_cont && (m_cont->cont_level != 0)) {
01674 #ifdef DEBUG_MIMEMAGIC
01675 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01676 #endif
01677
01678
01679
01680
01681 m = m_cont;
01682 m_cont = m_cont->next;
01683 }
01684 continue;
01685 }
01686
01687
01688 #ifdef DEBUG_MIMEMAGIC
01689 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01690 #endif
01691
01692
01693 conf->resultBuf = m->desc;
01694
01695 cont_level++;
01696
01697
01698
01699
01700 m = m->next;
01701 while (m && (m->cont_level != 0)) {
01702 #ifdef DEBUG_MIMEMAGIC
01703 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01704 #endif
01705 if (cont_level >= m->cont_level) {
01706 if (cont_level > m->cont_level) {
01707
01708
01709
01710
01711 cont_level = m->cont_level;
01712 }
01713 if (mget(&p, s, m, nbytes) &&
01714 mcheck(&p, m)) {
01715
01716
01717
01718
01719
01720
01721 #ifdef DEBUG_MIMEMAGIC
01722 kdDebug(7018) << "continuation matched" << endl;
01723 #endif
01724 conf->resultBuf = m->desc;
01725 cont_level++;
01726 }
01727 }
01728
01729 m = m->next;
01730 }
01731
01732
01733 if ( !conf->resultBuf.isEmpty() )
01734 {
01735 #ifdef DEBUG_MIMEMAGIC
01736 kdDebug(7018) << "match: matched" << endl;
01737 #endif
01738 return 1;
01739 }
01740 }
01741 #ifdef DEBUG_MIMEMAGIC
01742 kdDebug(7018) << "match: failed" << endl;
01743 #endif
01744 return 0;
01745 }
01746
01747
01748
01749 static int tagmagic(unsigned char *buf, int nbytes)
01750 {
01751 if(nbytes<40) return 0;
01752 if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01753 int size = 10;
01754
01755 if(buf[3] > 4) return 0;
01756 if(buf[5] & 0x0F) return 0;
01757
01758 if(buf[5] & 0x10) size += 10;
01759
01760 size += buf[9];
01761 size += buf[8] << 7;
01762 size += buf[7] << 14;
01763 size += buf[6] << 21;
01764 return size;
01765 }
01766 return 0;
01767 }
01768
01769
01770
01771 #define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
01772
01773 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01774 {
01775 int i;
01776 double pct, maxpct, pctsum;
01777 double pcts[NTYPES];
01778 int mostaccurate, tokencount;
01779 int typeset, jonly, conly, jconly, cppcomm, ccomm;
01780 int has_escapes = 0;
01781 unsigned char *s;
01782 char nbuf[HOWMANY + 1];
01783 char *token;
01784 register const struct names *p;
01785 int typecount[NTYPES];
01786
01787
01788 conf->accuracy = 70;
01789
01790
01791
01792
01793
01794
01795 if (*buf == '.') {
01796 unsigned char *tp = buf + 1;
01797
01798 while (isascii(*tp) && isspace(*tp))
01799 ++tp;
01800 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01801 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01802 conf->resultBuf = MIME_APPL_TROFF;
01803 return 1;
01804 }
01805 }
01806 if ((*buf == 'c' || *buf == 'C') &&
01807 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01808
01809 conf->resultBuf = MIME_TEXT_FORTRAN;
01810 return 1;
01811 }
01812 assert(nbytes-1 < HOWMANY + 1);
01813
01814
01815 s = (unsigned char *) memcpy(nbuf, buf, nbytes);
01816 s[nbytes-1] = '\0';
01817 has_escapes = (memchr(s, '\033', nbytes) != NULL);
01818
01819
01820
01821
01822 memset(&typecount, 0, sizeof(typecount));
01823 typeset = 0;
01824 jonly = 0;
01825 conly = 0;
01826 jconly = 0;
01827 cppcomm = 0;
01828 ccomm = 0;
01829 tokencount = 0;
01830 bool foundClass = false;
01831
01832
01833 while ((token = strtok((char *) s, " \t\n\r\f,;>")) != NULL) {
01834 s = NULL;
01835 #ifdef DEBUG_MIMEMAGIC
01836 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01837 #endif
01838 for (p = names; p->name ; p++) {
01839 if (STREQ(p->name, token)) {
01840 #ifdef DEBUG_MIMEMAGIC
01841 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01842 #endif
01843 tokencount++;
01844 typeset |= p->type;
01845 if (p->type == L_JAVA)
01846 jonly++;
01847 if ((p->type & (L_C|L_CPP|L_JAVA))
01848 == (L_CPP|L_JAVA)) {
01849 jconly++;
01850 if ( !foundClass && STREQ("class", token) )
01851 foundClass = true;
01852 }
01853 if ((p->type & (L_C|L_CPP|L_JAVA))
01854 == (L_C|L_CPP))
01855 conly++;
01856 if (STREQ(token, "//"))
01857 cppcomm++;
01858 if (STREQ(token, "/*"))
01859 ccomm++;
01860 for (i = 0; i < (int)NTYPES; i++)
01861 if ((1 << i) & p->type)
01862 typecount[i]++;
01863 }
01864 }
01865 }
01866
01867 if (typeset & (L_C|L_CPP|L_JAVA)) {
01868 conf->accuracy = 40;
01869 if (!(typeset & ~(L_C|L_CPP|L_JAVA))) {
01870 #ifdef DEBUG_MIMEMAGIC
01871 kdDebug(7018) << "C/C++/Java: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " ccomm=" << ccomm << endl;
01872 #endif
01873 if (jonly && conly)
01874
01875 if ( jonly > conly )
01876 conly = 0;
01877 else
01878 jonly = 0;
01879 if (jonly > 1 && foundClass) {
01880
01881 conf->resultBuf = QString(types[P_JAVA].type);
01882 return 1;
01883 }
01884 if (jconly > 1) {
01885
01886 if (typecount[P_JAVA] > typecount[P_CPP])
01887 conf->resultBuf = QString(types[P_JAVA].type);
01888 else
01889 conf->resultBuf = QString(types[P_CPP].type);
01890 return 1;
01891 }
01892 if (conly) {
01893
01894 if (cppcomm)
01895 conf->resultBuf = QString(types[P_CPP].type);
01896 else
01897 conf->resultBuf = QString(types[P_C].type);
01898 return 1;
01899 }
01900 if (ccomm) {
01901 conf->resultBuf = QString(types[P_C].type);
01902 return 1;
01903 }
01904 }
01905 }
01906
01907
01908
01909
01910
01911 mostaccurate = -1;
01912 maxpct = pctsum = 0.0;
01913 for (i = 0; i < (int)NTYPES; i++) {
01914 if (typecount[i] > 1) {
01915 pct = (double)typecount[i] / (double)types[i].kwords *
01916 (double)types[i].weight;
01917 pcts[i] = pct;
01918 pctsum += pct;
01919 if (pct > maxpct) {
01920 maxpct = pct;
01921 mostaccurate = i;
01922 }
01923 #ifdef DEBUG_MIMEMAGIC
01924 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01925 #endif
01926 }
01927 }
01928 if (mostaccurate >= 0) {
01929 if ( mostaccurate != P_JAVA || foundClass )
01930 {
01931 conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01932 #ifdef DEBUG_MIMEMAGIC
01933 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << accuracy << endl;
01934 #endif
01935 conf->resultBuf = QString(types[mostaccurate].type);
01936 return 1;
01937 }
01938 }
01939
01940 switch (is_tar(buf, nbytes)) {
01941 case 1:
01942
01943 conf->resultBuf = MIME_APPL_TAR;
01944 conf->accuracy = 90;
01945 return 1;
01946 case 2:
01947
01948 conf->resultBuf = MIME_APPL_TAR;
01949 conf->accuracy = 90;
01950 return 1;
01951 }
01952
01953 for (i = 0; i < nbytes; i++) {
01954 if (!isascii(*(buf + i)))
01955 return 0;
01956 }
01957
01958
01959 conf->accuracy = 90;
01960 if (has_escapes) {
01961
01962
01963 conf->resultBuf = MIME_TEXT_UNKNOWN;
01964 } else {
01965
01966 conf->resultBuf = MIME_TEXT_PLAIN;
01967 }
01968 return 1;
01969 }
01970
01971
01972 #define TEXT_MAXLINELEN 300
01973
01974
01975
01976
01977
01978 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01979 {
01980 int i;
01981 unsigned char *cp;
01982
01983 nbytes--;
01984
01985
01986 for (i = 0, cp = buf; i < nbytes; i++, cp++)
01987 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
01988 return 0;
01989
01990
01991
01992
01993 for (i = 0; i < nbytes;) {
01994 cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
01995 if (cp == NULL) {
01996
01997 if (i + TEXT_MAXLINELEN >= nbytes)
01998 break;
01999 else
02000 return 0;
02001 }
02002 if (cp - buf > TEXT_MAXLINELEN)
02003 return 0;
02004 i += (cp - buf + 1);
02005 buf = cp + 1;
02006 }
02007 conf->resultBuf = MIME_TEXT_PLAIN;
02008 return 1;
02009 }
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024
02025 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
02026
02027
02028
02029
02030
02031
02032 static int
02033 is_tar(unsigned char *buf, int nbytes)
02034 {
02035 register union record *header = (union record *) buf;
02036 register int i;
02037 register long sum,
02038 recsum;
02039 register char *p;
02040
02041 if (nbytes < (int)sizeof(union record))
02042 return 0;
02043
02044 recsum = from_oct(8, header->header.chksum);
02045
02046 sum = 0;
02047 p = header->charptr;
02048 for (i = sizeof(union record); --i >= 0;) {
02049
02050
02051
02052
02053 sum += 0xFF & *p++;
02054 }
02055
02056
02057 for (i = sizeof(header->header.chksum); --i >= 0;)
02058 sum -= 0xFF & header->header.chksum[i];
02059 sum += ' ' * sizeof header->header.chksum;
02060
02061 if (sum != recsum)
02062 return 0;
02063
02064 if (0 == strcmp(header->header.magic, TMAGIC))
02065 return 2;
02066
02067 return 1;
02068 }
02069
02070
02071
02072
02073
02074
02075
02076 static long
02077 from_oct(int digs, char *where)
02078 {
02079 register long value;
02080
02081 while (isspace(*where)) {
02082 where++;
02083 if (--digs <= 0)
02084 return -1;
02085 }
02086 value = 0;
02087 while (digs > 0 && isodigit(*where)) {
02088 value = (value << 3) | (*where++ - '0');
02089 --digs;
02090 }
02091
02092 if (digs > 0 && *where && !isspace(*where))
02093 return -1;
02094
02095 return value;
02096 }
02097
02098 KMimeMagic::KMimeMagic()
02099 {
02100
02101 QString mimefile = locate( "mime", "magic" );
02102 init( mimefile );
02103
02104 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02105 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02106 if ( !mergeConfig( *it ) )
02107 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02108 }
02109
02110 KMimeMagic::KMimeMagic(const QString & _configfile)
02111 {
02112 init( _configfile );
02113 }
02114
02115 void KMimeMagic::init( const QString& _configfile )
02116 {
02117 int result;
02118 conf = new config_rec;
02119
02120
02121 conf->magic = conf->last = NULL;
02122 magicResult = NULL;
02123 conf->followLinks = false;
02124
02125 conf->utimeConf = 0L;
02126
02127 result = apprentice(_configfile);
02128 if (result == -1)
02129 return;
02130 #ifdef MIME_MAGIC_DEBUG_TABLE
02131 test_table();
02132 #endif
02133 }
02134
02135
02136
02137
02138
02139 KMimeMagic::~KMimeMagic()
02140 {
02141 if (conf) {
02142 struct magic *p = conf->magic;
02143 struct magic *q;
02144 while (p) {
02145 q = p;
02146 p = p->next;
02147 free(q);
02148 }
02149 delete conf->utimeConf;
02150 delete conf;
02151 }
02152 delete magicResult;
02153 }
02154
02155 bool
02156 KMimeMagic::mergeConfig(const QString & _configfile)
02157 {
02158 kdDebug(7018) << k_funcinfo << _configfile << endl;
02159 int result;
02160
02161 if (_configfile.isEmpty())
02162 return false;
02163 result = apprentice(_configfile);
02164 if (result == -1) {
02165 return false;
02166 }
02167 #ifdef MIME_MAGIC_DEBUG_TABLE
02168 test_table();
02169 #endif
02170 return true;
02171 }
02172
02173 bool
02174 KMimeMagic::mergeBufConfig(char * _configbuf)
02175 {
02176 int result;
02177
02178 if (conf) {
02179 result = buff_apprentice(_configbuf);
02180 if (result == -1)
02181 return false;
02182 #ifdef MIME_MAGIC_DEBUG_TABLE
02183 test_table();
02184 #endif
02185 return true;
02186 }
02187 return false;
02188 }
02189
02190 void
02191 KMimeMagic::setFollowLinks( bool _enable )
02192 {
02193 conf->followLinks = _enable;
02194 }
02195
02196 KMimeMagicResult *
02197 KMimeMagic::findBufferType(const QByteArray &array)
02198 {
02199 unsigned char buf[HOWMANY + 1];
02200
02201 conf->resultBuf = QString::null;
02202 if ( !magicResult )
02203 magicResult = new KMimeMagicResult();
02204 magicResult->setInvalid();
02205 conf->accuracy = 100;
02206
02207 int nbytes = array.size();
02208
02209 if (nbytes > HOWMANY)
02210 nbytes = HOWMANY;
02211 memcpy(buf, array.data(), nbytes);
02212 if (nbytes == 0) {
02213 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02214 } else {
02215 buf[nbytes++] = '\0';
02216 tryit(conf, buf, nbytes);
02217 }
02218
02219
02220 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02221 magicResult->setAccuracy(conf->accuracy);
02222 return magicResult;
02223 }
02224
02225 static void
02226 refineResult(KMimeMagicResult *r, const QString & _filename)
02227 {
02228 QString tmp = r->mimeType();
02229 if (tmp.isEmpty())
02230 return;
02231 if ( tmp == "text/x-c" ||
02232 tmp == "text/x-c++" )
02233 {
02234 if ( _filename.right(2) == ".h" )
02235 tmp += "hdr";
02236 else
02237 tmp += "src";
02238 r->setMimeType(tmp);
02239 }
02240 }
02241
02242 KMimeMagicResult *
02243 KMimeMagic::findBufferFileType( const QByteArray &data,
02244 const QString &fn)
02245 {
02246 KMimeMagicResult * r = findBufferType( data );
02247 refineResult(r, fn);
02248 return r;
02249 }
02250
02251
02252
02253
02254 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02255 {
02256 #ifdef DEBUG_MIMEMAGIC
02257 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02258 #endif
02259 conf->resultBuf = QString::null;
02260
02261 if ( !magicResult )
02262 magicResult = new KMimeMagicResult();
02263 magicResult->setInvalid();
02264 conf->accuracy = 100;
02265
02266 if ( !conf->utimeConf )
02267 conf->utimeConf = new KMimeMagicUtimeConf();
02268
02269
02270 process(conf, fn );
02271
02272
02273
02274 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02275 magicResult->setAccuracy(conf->accuracy);
02276 refineResult(magicResult, fn);
02277 return magicResult;
02278 }