kio Library API Documentation

kmimemagic.cpp

00001 /* This file is part of the KDE libraries
00002    Copyright (C) 2000 Fritz Elfert <fritz@kde.org>
00003    Copyright (C) 2004 Allan Sandfeld Jensen <kde@carewolf.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License version 2 as published by the Free Software Foundation.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00017    Boston, MA 02111-1307, USA.
00018 */
00019 #include "kmimemagic.h"
00020 #include <kdebug.h>
00021 #include <kapplication.h>
00022 #include <qfile.h>
00023 #include <ksimpleconfig.h>
00024 #include <kstandarddirs.h>
00025 #include <kstaticdeleter.h>
00026 #include <klargefile.h>
00027 #include <assert.h>
00028 
00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00030 static void process(struct config_rec* conf,  const QString &);
00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00032 static int tagmagic(unsigned char *buf, int nbytes);
00033 static int textmagic(struct config_rec* conf, unsigned char *, int);
00034 
00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00036 static int match(struct config_rec* conf, unsigned char *, int);
00037 
00038 KMimeMagic* KMimeMagic::s_pSelf;
00039 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040 
00041 KMimeMagic* KMimeMagic::self()
00042 {
00043   if( !s_pSelf )
00044     initStatic();
00045   return s_pSelf;
00046 }
00047 
00048 void KMimeMagic::initStatic()
00049 {
00050   s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00051   s_pSelf->setFollowLinks( true );
00052 }
00053 
00054 #include <stdio.h>
00055 #include <unistd.h>
00056 #include <stdlib.h>
00057 #include <sys/wait.h>
00058 #include <sys/types.h>
00059 #include <sys/stat.h>
00060 #include <fcntl.h>
00061 #include <errno.h>
00062 #include <ctype.h>
00063 #include <time.h>
00064 #include <utime.h>
00065 #include <stdarg.h>
00066 #include <qregexp.h>
00067 #include <qstring.h>
00068 
00069 //#define MIME_MAGIC_DEBUG_TABLE // untested
00070 
00071 // Uncomment to debug the config-file parsing phase
00072 //#define DEBUG_APPRENTICE
00073 // Uncomment to debug the matching phase
00074 //#define DEBUG_MIMEMAGIC
00075 
00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077 #define DEBUG_LINENUMBERS
00078 #endif
00079 
00080 /*
00081  * Buitltin Mime types
00082  */
00083 #define MIME_BINARY_UNKNOWN    "application/octet-stream"
00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085 #define MIME_BINARY_ZEROSIZE   "application/x-zerosize"
00086 #define MIME_TEXT_UNKNOWN      "text/plain"
00087 #define MIME_TEXT_PLAIN        "text/plain"
00088 #define MIME_INODE_DIR         "inode/directory"
00089 #define MIME_INODE_CDEV        "inode/chardevice"
00090 #define MIME_INODE_BDEV        "inode/blockdevice"
00091 #define MIME_INODE_FIFO        "inode/fifo"
00092 #define MIME_INODE_LINK        "inode/link"
00093 #define MIME_INODE_SOCK        "inode/socket"
00094 // Following should go in magic-file - Fritz
00095 #define MIME_APPL_TROFF        "application/x-troff"
00096 #define MIME_APPL_TAR          "application/x-tar"
00097 #define MIME_TEXT_FORTRAN      "text/x-fortran"
00098 
00099 #define MAXMIMESTRING        256
00100 
00101 #define HOWMANY 4000            /* big enough to recognize most WWW files, and skip GPL-headers */
00102 #define MAXDESC   50            /* max leng of text description */
00103 #define MAXstring 64            /* max leng of "string" types */
00104 
00105 typedef union VALUETYPE {
00106     unsigned char b;
00107     unsigned short h;
00108     unsigned long l;
00109     char s[MAXstring];
00110     unsigned char hs[2];    /* 2 bytes of a fixed-endian "short" */
00111     unsigned char hl[4];    /* 2 bytes of a fixed-endian "long" */
00112 } VALUETYPE;
00113 
00114 struct magic {
00115     struct magic *next;     /* link to next entry */
00116 #ifdef DEBUG_LINENUMBERS
00117     int lineno;             /* line number from magic file - doesn't say from which one ;) */
00118 #endif
00119 
00120     short flag;
00121 #define INDIR    1              /* if '>(...)' appears,  */
00122 #define UNSIGNED 2              /* comparison is unsigned */
00123     short cont_level;       /* level of ">" */
00124     struct {
00125         char type;      /* byte short long */
00126         long offset;    /* offset from indirection */
00127     } in;
00128     long offset;            /* offset to magic number */
00129     unsigned char reln;     /* relation (0=eq, '>'=gt, etc) */
00130     char type;              /* int, short, long or string. */
00131     char vallen;            /* length of string value, if any */
00132 #define BYTE       1
00133 #define SHORT      2
00134 #define LONG       4
00135 #define STRING     5
00136 #define DATE       6
00137 #define BESHORT    7
00138 #define BELONG     8
00139 #define BEDATE     9
00140 #define LESHORT   10
00141 #define LELONG    11
00142 #define LEDATE    12
00143     VALUETYPE value;        /* either number or string */
00144     unsigned long mask;     /* mask before comparison with value */
00145     char nospflag;          /* suppress space character */
00146 
00147     /* NOTE: this string is suspected of overrunning - find it! */
00148     char desc[MAXDESC];     /* description */
00149 };
00150 
00151 /*
00152  * data structures for tar file recognition
00153  * --------------------------------------------------------------------------
00154  * Header file for public domain tar (tape archive) program.
00155  *
00156  * @(#)tar.h 1.20 86/10/29    Public Domain. Created 25 August 1985 by John
00157  * Gilmore, ihnp4!hoptoad!gnu.
00158  *
00159  * Header block on tape.
00160  *
00161  * I'm going to use traditional DP naming conventions here. A "block" is a big
00162  * chunk of stuff that we do I/O on. A "record" is a piece of info that we
00163  * care about. Typically many "record"s fit into a "block".
00164  */
00165 #define RECORDSIZE    512
00166 #define NAMSIZ    100
00167 #define TUNMLEN    32
00168 #define TGNMLEN    32
00169 
00170 union record {
00171     char charptr[RECORDSIZE];
00172     struct header {
00173         char name[NAMSIZ];
00174         char mode[8];
00175         char uid[8];
00176         char gid[8];
00177         char size[12];
00178         char mtime[12];
00179         char chksum[8];
00180         char linkflag;
00181         char linkname[NAMSIZ];
00182         char magic[8];
00183         char uname[TUNMLEN];
00184         char gname[TGNMLEN];
00185         char devmajor[8];
00186         char devminor[8];
00187     } header;
00188 };
00189 
00190 /* The magic field is filled with this if uname and gname are valid. */
00191 #define    TMAGIC        "ustar  "  /* 7 chars and a null */
00192 
00193 /*
00194  * file-function prototypes
00195  */
00196 static int is_tar(unsigned char *, int);
00197 static unsigned long signextend(struct magic *, unsigned long);
00198 static int getvalue(struct magic *, char **);
00199 static int hextoint(int);
00200 static char *getstr(char *, char *, int, int *);
00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00202 static int mcheck(union VALUETYPE *, struct magic *);
00203 static int mconvert(union VALUETYPE *, struct magic *);
00204 static long from_oct(int, char *);
00205 
00206 /*
00207  * includes for ASCII substring recognition formerly "names.h" in file
00208  * command
00209  *
00210  * Original notes: names and types used by ascmagic in file(1).
00211  * These tokens are
00212  * here because they can appear anywhere in the first HOWMANY bytes, while
00213  * tokens in /etc/magic must appear at fixed offsets into the file. Don't
00214  * make HOWMANY too high unless you have a very fast CPU.
00215  */
00216 
00217 /* these types are used calculate index to 'types': keep em in sync! */
00218 /* HTML inserted in first because this is a web server module now */
00219 /* ENG removed because stupid */
00220 #define L_HTML   0x001          /* HTML */
00221 #define L_C      0x002          /* first and foremost on UNIX */
00222 #define L_MAKE   0x004          /* Makefiles */
00223 #define L_PLI    0x008          /* PL/1 */
00224 #define L_MACH   0x010          /* some kinda assembler */
00225 #define L_PAS    0x020          /* Pascal */
00226 #define L_JAVA   0x040          /* Java source */
00227 #define L_CPP    0x080          /* C++ */
00228 #define L_MAIL   0x100          /* Electronic mail */
00229 #define L_NEWS   0x200          /* Usenet Netnews */
00230 #define L_DIFF   0x400          /* Output of diff */
00231 #define L_OBJC   0x800          /* Objective C */
00232 
00233 #define P_HTML   0          /* HTML */
00234 #define P_C      1          /* first and foremost on UNIX */
00235 #define P_MAKE   2          /* Makefiles */
00236 #define P_PLI    3          /* PL/1 */
00237 #define P_MACH   4          /* some kinda assembler */
00238 #define P_PAS    5          /* Pascal */
00239 #define P_JAVA   6          /* Java source */
00240 #define P_CPP    7          /* C++ */
00241 #define P_MAIL   8          /* Electronic mail */
00242 #define P_NEWS   9          /* Usenet Netnews */
00243 #define P_DIFF  10          /* Output of diff */
00244 #define P_OBJC  11          /* Objective C */
00245 
00246 typedef struct asc_type {
00247     const char *type;
00248     int  kwords;
00249     double  weight;
00250 } asc_type;
00251 
00252 static const asc_type types[] = {
00253     { "text/html",         19, 2 }, // 10 items but 10 different words only
00254     { "text/x-c",          13, 1 },
00255     { "text/x-makefile",    4, 1.9 },
00256     { "text/x-pli",         1, 3 },
00257     { "text/x-assembler",   6, 2.1 },
00258     { "text/x-pascal",      1, 1 },
00259     { "text/x-java",       12, 1 },
00260     { "text/x-c++",        19, 1 },
00261     { "message/rfc822",     4, 1.9 },
00262     { "message/news",       3, 2 },
00263         { "text/x-diff",        4, 2 },
00264         { "text/x-objc",    10, 1 }
00265 };
00266 
00267 #define NTYPES (sizeof(types)/sizeof(asc_type))
00268 
00269 static struct names {
00270     const char *name;
00271     short type;
00272 } const names[] = {
00273     {
00274         "<html", L_HTML
00275     },
00276     {
00277         "<HTML", L_HTML
00278     },
00279     {
00280         "<head", L_HTML
00281     },
00282     {
00283         "<HEAD", L_HTML
00284     },
00285     {
00286         "<body", L_HTML
00287     },
00288     {
00289         "<BODY", L_HTML
00290     },
00291     {
00292         "<title", L_HTML
00293     },
00294     {
00295         "<TITLE", L_HTML
00296     },
00297     {
00298         "<h1", L_HTML
00299     },
00300     {
00301         "<H1", L_HTML
00302     },
00303     {
00304         "<a", L_HTML
00305     },
00306     {
00307         "<A", L_HTML
00308     },
00309     {
00310         "<img", L_HTML
00311     },
00312     {
00313         "<IMG", L_HTML
00314     },
00315     {
00316         "<!--", L_HTML
00317     },
00318     {
00319         "<!doctype", L_HTML
00320     },
00321     {
00322         "<!DOCTYPE", L_HTML
00323     },
00324     {
00325         "<div", L_HTML
00326     },
00327     {
00328         "<DIV", L_HTML
00329     },
00330     {
00331         "<frame", L_HTML
00332     },
00333     {
00334         "<FRAME", L_HTML
00335     },
00336     {
00337         "<frameset", L_HTML
00338     },
00339     {
00340         "<FRAMESET", L_HTML
00341     },
00342         {
00343                 "<script", L_HTML
00344         },
00345         {
00346                 "<SCRIPT", L_HTML
00347         },
00348     {
00349         "/*", L_C|L_CPP|L_JAVA|L_OBJC
00350     },
00351     {
00352         "//", L_C|L_CPP|L_JAVA|L_OBJC
00353     },
00354     {
00355         "#include", L_C|L_CPP
00356     },
00357     {
00358         "#ifdef", L_C|L_CPP
00359     },
00360     {
00361         "#ifndef", L_C|L_CPP
00362     },
00363     {
00364         "bool", L_C|L_CPP
00365     },
00366     {
00367         "char", L_C|L_CPP|L_JAVA|L_OBJC
00368     },
00369     {
00370         "int", L_C|L_CPP|L_JAVA|L_OBJC
00371     },
00372     {
00373         "float", L_C|L_CPP|L_JAVA|L_OBJC
00374     },
00375     {
00376         "void", L_C|L_CPP|L_JAVA|L_OBJC
00377     },
00378     {
00379         "extern", L_C|L_CPP
00380     },
00381     {
00382         "struct", L_C|L_CPP
00383     },
00384     {
00385         "union", L_C|L_CPP
00386     },
00387     {
00388         "implements", L_JAVA
00389     },
00390     {
00391         "super", L_JAVA
00392     },
00393     {
00394         "import", L_JAVA
00395     },
00396     {
00397         "class", L_CPP|L_JAVA
00398     },
00399     {
00400         "public", L_CPP|L_JAVA
00401     },
00402     {
00403         "private", L_CPP|L_JAVA
00404     },
00405     {
00406         "explicit", L_CPP
00407     },
00408     {
00409         "virtual", L_CPP
00410     },
00411     {
00412         "namespace", L_CPP
00413     },
00414     {
00415         "#import", L_OBJC
00416     },
00417     {
00418         "@interface", L_OBJC
00419     },
00420     {
00421         "@implementation", L_OBJC
00422     },
00423     {
00424         "@protocol", L_OBJC
00425     },
00426     {
00427         "CFLAGS", L_MAKE
00428     },
00429     {
00430         "LDFLAGS", L_MAKE
00431     },
00432     {
00433         "all:", L_MAKE
00434     },
00435     {
00436         ".PHONY:", L_MAKE
00437     },
00438     {
00439         "srcdir", L_MAKE
00440     },
00441     {
00442         "exec_prefix", L_MAKE
00443     },
00444     /*
00445      * Too many files of text have these words in them.  Find another way
00446      * to recognize Fortrash.
00447      */
00448     {
00449         ".ascii", L_MACH
00450     },
00451     {
00452         ".asciiz", L_MACH
00453     },
00454     {
00455         ".byte", L_MACH
00456     },
00457     {
00458         ".even", L_MACH
00459     },
00460     {
00461         ".globl", L_MACH
00462     },
00463     {
00464         "clr", L_MACH
00465     },
00466     {
00467         "(input", L_PAS
00468     },
00469     {
00470         "dcl", L_PLI
00471     },
00472     {
00473         "Received:", L_MAIL
00474     },
00475     /* we now stop at '>' for tokens, so this one won't work {
00476         ">From", L_MAIL
00477         },*/
00478     {
00479         "Return-Path:", L_MAIL
00480     },
00481     {
00482         "Cc:", L_MAIL
00483     },
00484     {
00485         "Newsgroups:", L_NEWS
00486     },
00487     {
00488         "Path:", L_NEWS
00489     },
00490     {
00491         "Organization:", L_NEWS
00492     },
00493     {
00494         "---", L_DIFF
00495     },
00496     {
00497         "+++", L_DIFF
00498     },
00499     {
00500         "***", L_DIFF
00501     },
00502     {
00503         "@@", L_DIFF
00504     },
00505     {
00506         NULL, 0
00507     }
00508 };
00509 
00520 class KMimeMagicUtimeConf
00521 {
00522 public:
00523     KMimeMagicUtimeConf()
00524     {
00525         tmpDirs << QString::fromLatin1("/tmp"); // default value
00526 
00527         // The trick is that we also don't want the user to override globally set
00528         // directories. So we have to misuse KStandardDirs :}
00529         QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00530         if ( !confDirs.isEmpty() )
00531         {
00532             QString globalConf = confDirs.last() + "kmimemagicrc";
00533             if ( QFile::exists( globalConf ) )
00534             {
00535                 KSimpleConfig cfg( globalConf );
00536                 cfg.setGroup( "Settings" );
00537                 tmpDirs = cfg.readListEntry( "atimeDirs" );
00538             }
00539             if ( confDirs.count() > 1 )
00540             {
00541                 QString localConf = confDirs.first() + "kmimemagicrc";
00542                 if ( QFile::exists( localConf ) )
00543                 {
00544                     KSimpleConfig cfg( localConf );
00545                     cfg.setGroup( "Settings" );
00546                     tmpDirs += cfg.readListEntry( "atimeDirs" );
00547                 }
00548             }
00549             for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00550             {
00551                 QString dir = *it;
00552                 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00553                     (*it) += '/';
00554             }
00555         }
00556 #if 0
00557         // debug code
00558         for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00559             kdDebug(7018) << " atimeDir: " << *it << endl;
00560 #endif
00561     }
00562 
00563     bool restoreAccessTime( const QString & file ) const
00564     {
00565         QString dir = file.left( file.findRev( '/' ) );
00566         bool res = tmpDirs.contains( dir );
00567         //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl;
00568         return res;
00569     }
00570     QStringList tmpDirs;
00571 };
00572 
00573 /* current config */
00574 struct config_rec {
00575     bool followLinks;
00576     QString resultBuf;
00577     int accuracy;
00578 
00579     struct magic *magic,    /* head of magic config list */
00580     *last;
00581     KMimeMagicUtimeConf * utimeConf;
00582 };
00583 
00584 #ifdef MIME_MAGIC_DEBUG_TABLE
00585 static void
00586 test_table()
00587 {
00588     struct magic *m;
00589     struct magic *prevm = NULL;
00590 
00591     kdDebug(7018) << "test_table : started" << endl;
00592     for (m = conf->magic; m; m = m->next) {
00593         if (isprint((((unsigned long) m) >> 24) & 255) &&
00594             isprint((((unsigned long) m) >> 16) & 255) &&
00595             isprint((((unsigned long) m) >> 8) & 255) &&
00596             isprint(((unsigned long) m) & 255)) {
00597             //debug("test_table: POINTER CLOBBERED! "
00598             //"m=\"%c%c%c%c\" line=%d",
00599                   (((unsigned long) m) >> 24) & 255,
00600                   (((unsigned long) m) >> 16) & 255,
00601                   (((unsigned long) m) >> 8) & 255,
00602                   ((unsigned long) m) & 255,
00603                   prevm ? prevm->lineno : -1);
00604             break;
00605         }
00606         prevm = m;
00607     }
00608 }
00609 #endif
00610 
00611 #define    EATAB {while (isascii((unsigned char) *l) && \
00612           isspace((unsigned char) *l))  ++l;}
00613 
00614 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00615 {
00616     int ws_offset;
00617 
00618     /* delete newline */
00619     if (line[0]) {
00620         line[strlen(line) - 1] = '\0';
00621     }
00622     /* skip leading whitespace */
00623     ws_offset = 0;
00624     while (line[ws_offset] && isspace(line[ws_offset])) {
00625         ws_offset++;
00626     }
00627 
00628     /* skip blank lines */
00629     if (line[ws_offset] == 0) {
00630         return 0;
00631     }
00632     /* comment, do not parse */
00633     if (line[ws_offset] == '#')
00634         return 0;
00635 
00636     /* if we get here, we're going to use it so count it */
00637     (*rule)++;
00638 
00639     /* parse it */
00640     return (parse(line + ws_offset, lineno) != 0);
00641 }
00642 
00643 /*
00644  * apprentice - load configuration from the magic file.
00645  */
00646 int KMimeMagic::apprentice( const QString& magicfile )
00647 {
00648     FILE *f;
00649     char line[BUFSIZ + 1];
00650     int errs = 0;
00651     int lineno;
00652     int rule = 0;
00653     QCString fname;
00654 
00655     if (magicfile.isEmpty())
00656         return -1;
00657     fname = QFile::encodeName(magicfile);
00658     f = fopen(fname, "r");
00659     if (f == NULL) {
00660         kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00661         return -1;
00662     }
00663 
00664     /* parse it */
00665     for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00666         if (parse_line(line, &rule, lineno))
00667             errs++;
00668 
00669     fclose(f);
00670 
00671 #ifdef DEBUG_APPRENTICE
00672     kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00673     kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00674 #endif
00675 
00676 #ifdef MIME_MAGIC_DEBUG_TABLE
00677     test_table();
00678 #endif
00679 
00680     return (errs ? -1 : 0);
00681 }
00682 
00683 int KMimeMagic::buff_apprentice(char *buff)
00684 {
00685     char line[BUFSIZ + 2];
00686     int errs = 0;
00687     int lineno = 1;
00688     char *start = buff;
00689     char *end;
00690     int count = 0;
00691     int rule = 0;
00692     int len = strlen(buff) + 1;
00693 
00694     /* parse it */
00695     do {
00696         count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00697         strncpy(line, start, count);
00698         line[count] = '\0';
00699         if ((end = strchr(line, '\n'))) {
00700             *(++end) = '\0';
00701             count = strlen(line);
00702         } else
00703           strcat(line, "\n");
00704         start += count;
00705         len -= count;
00706         if (parse_line(line, &rule, lineno))
00707             errs++;
00708         lineno++;
00709     } while (len > 0);
00710 
00711 #ifdef DEBUG_APPRENTICE
00712     kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00713     kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00714 #endif
00715 
00716 #ifdef MIME_MAGIC_DEBUG_TABLE
00717     test_table();
00718 #endif
00719 
00720     return (errs ? -1 : 0);
00721 }
00722 
00723 /*
00724  * extend the sign bit if the comparison is to be signed
00725  */
00726 static unsigned long
00727 signextend(struct magic *m, unsigned long v)
00728 {
00729     if (!(m->flag & UNSIGNED))
00730         switch (m->type) {
00731                 /*
00732                  * Do not remove the casts below.  They are vital.
00733                  * When later compared with the data, the sign
00734                  * extension must have happened.
00735                  */
00736             case BYTE:
00737                 v = (char) v;
00738                 break;
00739             case SHORT:
00740             case BESHORT:
00741             case LESHORT:
00742                 v = (short) v;
00743                 break;
00744             case DATE:
00745             case BEDATE:
00746             case LEDATE:
00747             case LONG:
00748             case BELONG:
00749             case LELONG:
00750                 v = (long) v;
00751                 break;
00752             case STRING:
00753                 break;
00754             default:
00755                 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00756                 return 998; //good value
00757         }
00758     return v;
00759 }
00760 
00761 /*
00762  * parse one line from magic file, put into magic[index++] if valid
00763  */
00764 int KMimeMagic::parse(char *l, int
00765 #ifdef DEBUG_LINENUMBERS
00766     lineno
00767 #endif
00768         )
00769 {
00770     int i = 0;
00771     struct magic *m;
00772     char *t,
00773     *s;
00774     /* allocate magic structure entry */
00775     if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00776         kdError(7018) << "parse: Out of memory." << endl;
00777         return -1;
00778     }
00779     /* append to linked list */
00780     m->next = NULL;
00781     if (!conf->magic || !conf->last) {
00782         conf->magic = conf->last = m;
00783     } else {
00784         conf->last->next = m;
00785         conf->last = m;
00786     }
00787 
00788     /* set values in magic structure */
00789     m->flag = 0;
00790     m->cont_level = 0;
00791 #ifdef DEBUG_LINENUMBERS
00792     m->lineno = lineno;
00793 #endif
00794 
00795     while (*l == '>') {
00796         ++l;            /* step over */
00797         m->cont_level++;
00798     }
00799 
00800     if (m->cont_level != 0 && *l == '(') {
00801         ++l;            /* step over */
00802         m->flag |= INDIR;
00803     }
00804     /* get offset, then skip over it */
00805     m->offset = (int) strtol(l, &t, 0);
00806     if (l == t) {
00807             kdError(7018) << "parse: offset " << l << " invalid" << endl;
00808     }
00809     l = t;
00810 
00811     if (m->flag & INDIR) {
00812         m->in.type = LONG;
00813         m->in.offset = 0;
00814         /*
00815          * read [.lbs][+-]nnnnn)
00816          */
00817         if (*l == '.') {
00818             switch (*++l) {
00819                 case 'l':
00820                     m->in.type = LONG;
00821                     break;
00822                 case 's':
00823                     m->in.type = SHORT;
00824                     break;
00825                 case 'b':
00826                     m->in.type = BYTE;
00827                     break;
00828                 default:
00829                     kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00830                     break;
00831             }
00832             l++;
00833         }
00834         s = l;
00835         if (*l == '+' || *l == '-')
00836             l++;
00837         if (isdigit((unsigned char) *l)) {
00838             m->in.offset = strtol(l, &t, 0);
00839             if (*s == '-')
00840                 m->in.offset = -m->in.offset;
00841         } else
00842             t = l;
00843         if (*t++ != ')') {
00844             kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00845         }
00846         l = t;
00847     }
00848     while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00849         ++l;
00850     EATAB;
00851 
00852 #define NBYTE       4
00853 #define NSHORT      5
00854 #define NLONG       4
00855 #define NSTRING     6
00856 #define NDATE       4
00857 #define NBESHORT    7
00858 #define NBELONG     6
00859 #define NBEDATE     6
00860 #define NLESHORT    7
00861 #define NLELONG     6
00862 #define NLEDATE     6
00863 
00864     if (*l == 'u') {
00865         ++l;
00866         m->flag |= UNSIGNED;
00867     }
00868     /* get type, skip it */
00869     if (strncmp(l, "byte", NBYTE) == 0) {
00870         m->type = BYTE;
00871         l += NBYTE;
00872     } else if (strncmp(l, "short", NSHORT) == 0) {
00873         m->type = SHORT;
00874         l += NSHORT;
00875     } else if (strncmp(l, "long", NLONG) == 0) {
00876         m->type = LONG;
00877         l += NLONG;
00878     } else if (strncmp(l, "string", NSTRING) == 0) {
00879         m->type = STRING;
00880         l += NSTRING;
00881     } else if (strncmp(l, "date", NDATE) == 0) {
00882         m->type = DATE;
00883         l += NDATE;
00884     } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00885         m->type = BESHORT;
00886         l += NBESHORT;
00887     } else if (strncmp(l, "belong", NBELONG) == 0) {
00888         m->type = BELONG;
00889         l += NBELONG;
00890     } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00891         m->type = BEDATE;
00892         l += NBEDATE;
00893     } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00894         m->type = LESHORT;
00895         l += NLESHORT;
00896     } else if (strncmp(l, "lelong", NLELONG) == 0) {
00897         m->type = LELONG;
00898         l += NLELONG;
00899     } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00900         m->type = LEDATE;
00901         l += NLEDATE;
00902     } else {
00903         kdError(7018) << "parse: type " << l << " invalid" << endl;
00904         return -1;
00905     }
00906     /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
00907     if (*l == '&') {
00908         ++l;
00909         m->mask = signextend(m, strtol(l, &l, 0));
00910     } else
00911         m->mask = (unsigned long) ~0L;
00912     EATAB;
00913 
00914     switch (*l) {
00915         case '>':
00916         case '<':
00917             /* Old-style anding: "0 byte &0x80 dynamically linked" */
00918         case '&':
00919         case '^':
00920         case '=':
00921             m->reln = *l;
00922             ++l;
00923             break;
00924         case '!':
00925             if (m->type != STRING) {
00926                 m->reln = *l;
00927                 ++l;
00928                 break;
00929             }
00930             /* FALL THROUGH */
00931         default:
00932             if (*l == 'x' && isascii((unsigned char) l[1]) &&
00933                 isspace((unsigned char) l[1])) {
00934                 m->reln = *l;
00935                 ++l;
00936                 goto GetDesc;   /* Bill The Cat */
00937             }
00938             m->reln = '=';
00939             break;
00940     }
00941     EATAB;
00942 
00943     if (getvalue(m, &l))
00944         return -1;
00945     /*
00946      * now get last part - the description
00947      */
00948       GetDesc:
00949     EATAB;
00950     if (l[0] == '\b') {
00951         ++l;
00952         m->nospflag = 1;
00953     } else if ((l[0] == '\\') && (l[1] == 'b')) {
00954         ++l;
00955         ++l;
00956         m->nospflag = 1;
00957     } else
00958         m->nospflag = 0;
00959         // Copy description - until EOL or '#' (for comments)
00960         while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00961             m->desc[i++] = *l++;
00962         m->desc[i] = '\0';
00963         // Remove trailing spaces
00964         while (--i>0 && isspace( m->desc[i] ))
00965             m->desc[i] = '\0';
00966 
00967         // old code
00968     //while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ;
00969 
00970 #ifdef DEBUG_APPRENTICE
00971     kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00972 #endif
00973     return 0;
00974 }
00975 
00976 /*
00977  * Read a numeric value from a pointer, into the value union of a magic
00978  * pointer, according to the magic type.  Update the string pointer to point
00979  * just after the number read.  Return 0 for success, non-zero for failure.
00980  */
00981 static int
00982 getvalue(struct magic *m, char **p)
00983 {
00984     int slen;
00985 
00986     if (m->type == STRING) {
00987         *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00988         m->vallen = slen;
00989     } else if (m->reln != 'x')
00990         m->value.l = signextend(m, strtol(*p, p, 0));
00991     return 0;
00992 }
00993 
00994 /*
00995  * Convert a string containing C character escapes.  Stop at an unescaped
00996  * space or tab. Copy the converted version to "p", returning its length in
00997  * *slen. Return updated scan pointer as function result.
00998  */
00999 static char *
01000 getstr(register char *s, register char *p, int plen, int *slen)
01001 {
01002     char *origs = s,
01003     *origp = p;
01004     char *pmax = p + plen - 1;
01005     register int c;
01006     register int val;
01007 
01008     while ((c = *s++) != '\0') {
01009         if (isspace((unsigned char) c))
01010             break;
01011         if (p >= pmax) {
01012             kdError(7018) << "String too long: " << origs << endl;
01013             break;
01014         }
01015         if (c == '\\') {
01016             switch (c = *s++) {
01017 
01018                 case '\0':
01019                     goto out;
01020 
01021                 default:
01022                     *p++ = (char) c;
01023                     break;
01024 
01025                 case 'n':
01026                     *p++ = '\n';
01027                     break;
01028 
01029                 case 'r':
01030                     *p++ = '\r';
01031                     break;
01032 
01033                 case 'b':
01034                     *p++ = '\b';
01035                     break;
01036 
01037                 case 't':
01038                     *p++ = '\t';
01039                     break;
01040 
01041                 case 'f':
01042                     *p++ = '\f';
01043                     break;
01044 
01045                 case 'v':
01046                     *p++ = '\v';
01047                     break;
01048 
01049                     /* \ and up to 3 octal digits */
01050                 case '0':
01051                 case '1':
01052                 case '2':
01053                 case '3':
01054                 case '4':
01055                 case '5':
01056                 case '6':
01057                 case '7':
01058                     val = c - '0';
01059                     c = *s++;   /* try for 2 */
01060                     if (c >= '0' && c <= '7') {
01061                         val = (val << 3) | (c - '0');
01062                         c = *s++;   /* try for 3 */
01063                         if (c >= '0' && c <= '7')
01064                             val = (val << 3) | (c - '0');
01065                         else
01066                             --s;
01067                     } else
01068                         --s;
01069                     *p++ = (char) val;
01070                     break;
01071 
01072                     /* \x and up to 3 hex digits */
01073                 case 'x':
01074                     val = 'x';  /* Default if no digits */
01075                     c = hextoint(*s++); /* Get next char */
01076                     if (c >= 0) {
01077                         val = c;
01078                         c = hextoint(*s++);
01079                         if (c >= 0) {
01080                             val = (val << 4) + c;
01081                             c = hextoint(*s++);
01082                             if (c >= 0) {
01083                                 val = (val << 4) + c;
01084                             } else
01085                                 --s;
01086                         } else
01087                             --s;
01088                     } else
01089                         --s;
01090                     *p++ = (char) val;
01091                     break;
01092             }
01093         } else
01094             *p++ = (char) c;
01095     }
01096       out:
01097     *p = '\0';
01098     *slen = p - origp;
01099     //for ( char* foo = origp; foo < p ; ++foo )
01100     //  kdDebug(7018) << "  " << *foo << endl;
01101     return s;
01102 }
01103 
01104 
01105 /* Single hex char to int; -1 if not a hex char. */
01106 static int
01107 hextoint(int c)
01108 {
01109     if (!isascii((unsigned char) c))
01110         return -1;
01111     if (isdigit((unsigned char) c))
01112         return c - '0';
01113     if ((c >= 'a') && (c <= 'f'))
01114         return c + 10 - 'a';
01115     if ((c >= 'A') && (c <= 'F'))
01116         return c + 10 - 'A';
01117     return -1;
01118 }
01119 
01120 /*
01121  * Convert the byte order of the data we are looking at
01122  */
01123 static int
01124 mconvert(union VALUETYPE *p, struct magic *m)
01125 {
01126     switch (m->type) {
01127         case BYTE:
01128             return 1;
01129         case STRING:
01130             /* Null terminate */
01131             p->s[sizeof(p->s) - 1] = '\0';
01132             return 1;
01133 #ifndef WORDS_BIGENDIAN
01134         case SHORT:
01135 #endif
01136         case BESHORT:
01137             p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01138             return 1;
01139 #ifndef WORDS_BIGENDIAN
01140         case LONG:
01141         case DATE:
01142 #endif
01143         case BELONG:
01144         case BEDATE:
01145             p->l = (long)
01146                 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01147             return 1;
01148 #ifdef WORDS_BIGENDIAN
01149         case SHORT:
01150 #endif
01151         case LESHORT:
01152             p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01153             return 1;
01154 #ifdef WORDS_BIGENDIAN
01155         case LONG:
01156         case DATE:
01157 #endif
01158         case LELONG:
01159         case LEDATE:
01160             p->l = (long)
01161                 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01162             return 1;
01163         default:
01164             kdError(7018) << "mconvert: invalid type " << m->type << endl;
01165             return 0;
01166     }
01167 }
01168 
01169 
01170 static int
01171 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01172      int nbytes)
01173 {
01174     long offset = m->offset;
01175         switch ( m->type )
01176     {
01177         case BYTE:
01178         if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1
01179             return 0;
01180         break;
01181         case SHORT:
01182         case BESHORT:
01183         case LESHORT:
01184             if ( offset + 2 > nbytes-1 )
01185             return 0;
01186         break;
01187         case LONG:
01188         case BELONG:
01189         case LELONG:
01190         case DATE:
01191         case BEDATE:
01192         case LEDATE:
01193             if ( offset + 4 > nbytes-1 )
01194             return 0;
01195         break;
01196         case STRING:
01197         break;
01198     }
01199 
01200 // The file length might be < sizeof(union VALUETYPE) (David)
01201 // -> pad with zeros (the 'file' command does it this way)
01202 // Thanks to Stan Covington <stan@calderasystems.com> for detailed report
01203     if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01204     {
01205       int have = nbytes - offset;
01206       memset(p, 0, sizeof(union VALUETYPE));
01207       if (have > 0)
01208         memcpy(p, s + offset, have);
01209     } else
01210       memcpy(p, s + offset, sizeof(union VALUETYPE));
01211 
01212     if (!mconvert(p, m))
01213         return 0;
01214 
01215     if (m->flag & INDIR) {
01216 
01217         switch (m->in.type) {
01218             case BYTE:
01219                 offset = p->b + m->in.offset;
01220                 break;
01221             case SHORT:
01222                 offset = p->h + m->in.offset;
01223                 break;
01224             case LONG:
01225                 offset = p->l + m->in.offset;
01226                 break;
01227         }
01228 
01229         if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01230              return 0;
01231 
01232         memcpy(p, s + offset, sizeof(union VALUETYPE));
01233 
01234         if (!mconvert(p, m))
01235             return 0;
01236     }
01237     return 1;
01238 }
01239 
01240 static int
01241 mcheck(union VALUETYPE *p, struct magic *m)
01242 {
01243     register unsigned long l = m->value.l;
01244     register unsigned long v;
01245     int matched;
01246 
01247     if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01248         kdError(7018) << "BOINK" << endl;
01249         return 1;
01250     }
01251     switch (m->type) {
01252         case BYTE:
01253             v = p->b;
01254             break;
01255 
01256         case SHORT:
01257         case BESHORT:
01258         case LESHORT:
01259             v = p->h;
01260             break;
01261 
01262         case LONG:
01263         case BELONG:
01264         case LELONG:
01265         case DATE:
01266         case BEDATE:
01267         case LEDATE:
01268             v = p->l;
01269             break;
01270 
01271         case STRING:
01272             l = 0;
01273             /*
01274              * What we want here is: v = strncmp(m->value.s, p->s,
01275              * m->vallen); but ignoring any nulls.  bcmp doesn't give
01276              * -/+/0 and isn't universally available anyway.
01277              */
01278             v = 0;
01279             {
01280                 register unsigned char *a = (unsigned char *) m->value.s;
01281                 register unsigned char *b = (unsigned char *) p->s;
01282                 register int len = m->vallen;
01283                 Q_ASSERT(len);
01284 
01285                 while (--len >= 0)
01286                     if ((v = *b++ - *a++) != 0)
01287                         break;
01288             }
01289             break;
01290         default:
01291             kdError(7018) << "mcheck: invalid type " << m->type << endl;
01292             return 0;   /* NOTREACHED */
01293     }
01294 #if 0
01295     qDebug("Before signextend %08x", v);
01296 #endif
01297     v = signextend(m, v) & m->mask;
01298 #if 0
01299     qDebug("After signextend %08x", v);
01300 #endif
01301 
01302     switch (m->reln) {
01303         case 'x':
01304             matched = 1;
01305             break;
01306 
01307         case '!':
01308             matched = v != l;
01309             break;
01310 
01311         case '=':
01312             matched = v == l;
01313             break;
01314 
01315         case '>':
01316             if (m->flag & UNSIGNED)
01317                 matched = v > l;
01318             else
01319                 matched = (long) v > (long) l;
01320             break;
01321 
01322         case '<':
01323             if (m->flag & UNSIGNED)
01324                 matched = v < l;
01325             else
01326                 matched = (long) v < (long) l;
01327             break;
01328 
01329         case '&':
01330             matched = (v & l) == l;
01331             break;
01332 
01333         case '^':
01334             matched = (v & l) != l;
01335             break;
01336 
01337         default:
01338             matched = 0;
01339             kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01340             break;  /* NOTREACHED */
01341     }
01342 
01343     return matched;
01344 }
01345 
01346 /*
01347  * magic_process - process input file fn. Opens the file and reads a
01348  * fixed-size buffer to begin processing the contents.
01349  */
01350 
01351 void process(struct config_rec* conf, const QString & fn)
01352 {
01353     int fd = 0;
01354     unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
01355     KDE_struct_stat sb;
01356     int nbytes = 0;         /* number of bytes read from a datafile */
01357         int tagbytes = 0;       /* size of prefixed tag */
01358         QCString fileName = QFile::encodeName( fn );
01359 
01360     /*
01361      * first try judging the file based on its filesystem status
01362      */
01363     if (fsmagic(conf, fileName, &sb) != 0) {
01364         //resultBuf += "\n";
01365         return;
01366     }
01367     if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01368         /* We can't open it, but we were able to stat it. */
01369         /*
01370          * if (sb.st_mode & 0002) addResult("writable, ");
01371          * if (sb.st_mode & 0111) addResult("executable, ");
01372          */
01373         //kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl;
01374         conf->resultBuf = MIME_BINARY_UNREADABLE;
01375         return;
01376     }
01377     /*
01378      * try looking at the first HOWMANY bytes
01379      */
01380     if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01381         kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01382         conf->resultBuf = MIME_BINARY_UNREADABLE;
01383         return;
01384     }
01385         if ((tagbytes = tagmagic(buf, nbytes))) {
01386         // Read buffer at new position
01387         lseek(fd, tagbytes, SEEK_SET);
01388         nbytes = read(fd, (char*)buf, HOWMANY);
01389         if (nbytes < 0) {
01390             conf->resultBuf = MIME_BINARY_UNREADABLE;
01391             return;
01392         }
01393         }
01394     if (nbytes == 0) {
01395         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01396     } else {
01397         buf[nbytes++] = '\0';   /* null-terminate it */
01398         tryit(conf, buf, nbytes);
01399     }
01400 
01401         if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01402         {
01403             /*
01404              * Try to restore access, modification times if read it.
01405              * This changes the "change" time (ctime), but we can't do anything
01406              * about that.
01407              */
01408             struct utimbuf utbuf;
01409             utbuf.actime = sb.st_atime;
01410             utbuf.modtime = sb.st_mtime;
01411             (void) utime(fileName, &utbuf);
01412         }
01413     (void) close(fd);
01414 }
01415 
01416 
01417 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01418 {
01419     /* try tests in /etc/magic (or surrogate magic file) */
01420     if (match(conf, buf, nb))
01421         return;
01422 
01423     /* try known keywords, check for ascii-ness too. */
01424     if (ascmagic(conf, buf, nb) == 1)
01425         return;
01426 
01427         /* see if it's plain text */
01428         if (textmagic(conf, buf, nb))
01429                 return;
01430 
01431     /* abandon hope, all ye who remain here */
01432     conf->resultBuf = MIME_BINARY_UNKNOWN;
01433     conf->accuracy = 0;
01434 }
01435 
01436 static int
01437 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01438 {
01439     int ret = 0;
01440 
01441     /*
01442      * Fstat is cheaper but fails for files you don't have read perms on.
01443      * On 4.2BSD and similar systems, use lstat() to identify symlinks.
01444      */
01445     ret = KDE_lstat(fn, sb);  /* don't merge into if; see "ret =" above */
01446 
01447     if (ret) {
01448         return 1;
01449 
01450     }
01451     /*
01452      * if (sb->st_mode & S_ISUID) resultBuf += "setuid ";
01453      * if (sb->st_mode & S_ISGID) resultBuf += "setgid ";
01454      * if (sb->st_mode & S_ISVTX) resultBuf += "sticky ";
01455      */
01456 
01457     switch (sb->st_mode & S_IFMT) {
01458     case S_IFDIR:
01459         conf->resultBuf = MIME_INODE_DIR;
01460         return 1;
01461     case S_IFCHR:
01462         conf->resultBuf = MIME_INODE_CDEV;
01463         return 1;
01464     case S_IFBLK:
01465         conf->resultBuf = MIME_INODE_BDEV;
01466         return 1;
01467         /* TODO add code to handle V7 MUX and Blit MUX files */
01468 #ifdef    S_IFIFO
01469     case S_IFIFO:
01470         conf->resultBuf = MIME_INODE_FIFO;
01471         return 1;
01472 #endif
01473 #ifdef    S_IFLNK
01474     case S_IFLNK:
01475     {
01476         char buf[BUFSIZ + BUFSIZ + 4];
01477         register int nch;
01478         KDE_struct_stat tstatbuf;
01479 
01480         if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01481             conf->resultBuf = MIME_INODE_LINK;
01482             //conf->resultBuf += "\nunreadable";
01483             return 1;
01484         }
01485         buf[nch] = '\0'; /* readlink(2) forgets this */
01486         /* If broken symlink, say so and quit early. */
01487         if (*buf == '/') {
01488             if (KDE_stat(buf, &tstatbuf) < 0) {
01489                 conf->resultBuf = MIME_INODE_LINK;
01490                 //conf->resultBuf += "\nbroken";
01491                 return 1;
01492             }
01493         } else {
01494             char *tmp;
01495             char buf2[BUFSIZ + BUFSIZ + 4];
01496 
01497             strncpy(buf2, fn, BUFSIZ);
01498             buf2[BUFSIZ] = 0;
01499 
01500             if ((tmp = strrchr(buf2, '/')) == NULL) {
01501                 tmp = buf; /* in current dir */
01502             } else {
01503                 /* dir part plus (rel.) link */
01504                 *++tmp = '\0';
01505                 strcat(buf2, buf);
01506                 tmp = buf2;
01507             }
01508             if (KDE_stat(tmp, &tstatbuf) < 0) {
01509                 conf->resultBuf = MIME_INODE_LINK;
01510                 //conf->resultBuf += "\nbroken";
01511                 return 1;
01512             } else
01513                 strcpy(buf, tmp);
01514         }
01515         if (conf->followLinks)
01516             process( conf, QFile::decodeName( buf ) );
01517         else
01518             conf->resultBuf = MIME_INODE_LINK;
01519         return 1;
01520     }
01521     return 1;
01522 #endif
01523 #ifdef    S_IFSOCK
01524 #ifndef __COHERENT__
01525     case S_IFSOCK:
01526         conf->resultBuf = MIME_INODE_SOCK;
01527         return 1;
01528 #endif
01529 #endif
01530     case S_IFREG:
01531         break;
01532     default:
01533         kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01534         /* NOTREACHED */
01535     }
01536 
01537     /*
01538      * regular file, check next possibility
01539      */
01540     if (sb->st_size == 0) {
01541         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01542         return 1;
01543     }
01544     return 0;
01545 }
01546 
01547 /*
01548  * Go through the whole list, stopping if you find a match.  Process all the
01549  * continuations of that match before returning.
01550  *
01551  * We support multi-level continuations:
01552  *
01553  * At any time when processing a successful top-level match, there is a current
01554  * continuation level; it represents the level of the last successfully
01555  * matched continuation.
01556  *
01557  * Continuations above that level are skipped as, if we see one, it means that
01558  * the continuation that controls them - i.e, the lower-level continuation
01559  * preceding them - failed to match.
01560  *
01561  * Continuations below that level are processed as, if we see one, it means
01562  * we've finished processing or skipping higher-level continuations under the
01563  * control of a successful or unsuccessful lower-level continuation, and are
01564  * now seeing the next lower-level continuation and should process it.  The
01565  * current continuation level reverts to the level of the one we're seeing.
01566  *
01567  * Continuations at the current level are processed as, if we see one, there's
01568  * no lower-level continuation that may have failed.
01569  *
01570  * If a continuation matches, we bump the current continuation level so that
01571  * higher-level continuations are processed.
01572  */
01573 static int
01574 match(struct config_rec* conf, unsigned char *s, int nbytes)
01575 {
01576     int cont_level = 0;
01577     union VALUETYPE p;
01578     struct magic *m;
01579 
01580 #ifdef DEBUG_MIMEMAGIC
01581     kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01582     for (m = conf->magic; m; m = m->next) {
01583         if (isprint((((unsigned long) m) >> 24) & 255) &&
01584             isprint((((unsigned long) m) >> 16) & 255) &&
01585             isprint((((unsigned long) m) >> 8) & 255) &&
01586             isprint(((unsigned long) m) & 255)) {
01587             kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01588             break;
01589         }
01590     }
01591 #endif
01592 
01593     for (m = conf->magic; m; m = m->next) {
01594 #ifdef DEBUG_MIMEMAGIC
01595         kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01596 #endif
01597         memset(&p, 0, sizeof(union VALUETYPE));
01598 
01599         /* check if main entry matches */
01600         if (!mget(&p, s, m, nbytes) ||
01601             !mcheck(&p, m)) {
01602             struct magic *m_cont;
01603 
01604             /*
01605              * main entry didn't match, flush its continuations
01606              */
01607             if (!m->next || (m->next->cont_level == 0)) {
01608                 continue;
01609             }
01610             m_cont = m->next;
01611             while (m_cont && (m_cont->cont_level != 0)) {
01612 #ifdef DEBUG_MIMEMAGIC
01613                 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01614 #endif
01615                 /*
01616                  * this trick allows us to keep *m in sync
01617                  * when the continue advances the pointer
01618                  */
01619                 m = m_cont;
01620                 m_cont = m_cont->next;
01621             }
01622             continue;
01623         }
01624         /* if we get here, the main entry rule was a match */
01625         /* this will be the last run through the loop */
01626 #ifdef DEBUG_MIMEMAGIC
01627         kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01628 #endif
01629 
01630         /* remember the match */
01631         conf->resultBuf = m->desc;
01632 
01633         cont_level++;
01634         /*
01635          * while (m && m->next && m->next->cont_level != 0 && ( m =
01636          * m->next ))
01637          */
01638         m = m->next;
01639         while (m && (m->cont_level != 0)) {
01640 #ifdef DEBUG_MIMEMAGIC
01641                     kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01642 #endif
01643                     if (cont_level >= m->cont_level) {
01644                 if (cont_level > m->cont_level) {
01645                     /*
01646                      * We're at the end of the level
01647                      * "cont_level" continuations.
01648                      */
01649                     cont_level = m->cont_level;
01650                 }
01651                 if (mget(&p, s, m, nbytes) &&
01652                     mcheck(&p, m)) {
01653                     /*
01654                      * This continuation matched. Print
01655                      * its message, with a blank before
01656                      * it if the previous item printed
01657                      * and this item isn't empty.
01658                      */
01659 #ifdef DEBUG_MIMEMAGIC
01660                                     kdDebug(7018) << "continuation matched" << endl;
01661 #endif
01662                                     conf->resultBuf = m->desc;
01663                     cont_level++;
01664                 }
01665             }
01666             /* move to next continuation record */
01667             m = m->next;
01668         }
01669                 // KDE-specific: need an actual mimetype for a real match
01670                 // If we only matched a rule with continuations but no mimetype, it's not a match
01671                 if ( !conf->resultBuf.isEmpty() )
01672                 {
01673 #ifdef DEBUG_MIMEMAGIC
01674                     kdDebug(7018) << "match: matched" << endl;
01675 #endif
01676                     return 1;       /* all through */
01677                 }
01678     }
01679 #ifdef DEBUG_MIMEMAGIC
01680     kdDebug(7018) << "match: failed" << endl;
01681 #endif
01682     return 0;               /* no match at all */
01683 }
01684 
01685 // Try to parse prefixed tags before matching on content
01686 // Sofar only ID3v2 tags (<=.4) are handled
01687 static int tagmagic(unsigned char *buf, int nbytes)
01688 {
01689     if(nbytes<40) return 0;
01690     if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01691         int size = 10;
01692         // Sanity (known version, no unknown flags)
01693         if(buf[3] > 4) return 0;
01694         if(buf[5] & 0x0F) return 0;
01695         // Tag has v4 footer
01696         if(buf[5] & 0x10) size += 10;
01697         // Calculated syncsafe size
01698         size += buf[9];
01699         size += buf[8] << 7;
01700         size += buf[7] << 14;
01701         size += buf[6] << 21;
01702         return size;
01703     }
01704     return 0;
01705 }
01706 
01707 struct Token {
01708     char *data;
01709     int length;
01710 };
01711 
01712 struct Tokenizer
01713 {
01714     Tokenizer(char* buf, int nbytes) {
01715         data = buf;
01716         length = nbytes;
01717         pos = 0;
01718     }
01719     bool isNewLine() {
01720         return newline;
01721     }
01722     Token* nextToken() {
01723         if (pos == 0)
01724             newline = true;
01725         else
01726             newline = false;
01727         token.data = data+pos;
01728         token.length = 0;
01729         while(pos<length) {
01730             switch (data[pos]) {
01731                 case '\n':
01732                     newline = true;
01733                 case '\0':
01734                 case '\t':
01735                 case ' ':
01736                 case '\r':
01737                 case '\f':
01738                 case ',':
01739                 case ';':
01740                 case '>':
01741                     if (token.length == 0) token.data++;
01742                     else
01743                         return &token;
01744                     break;
01745                 default:
01746                     token.length++;
01747             }
01748             pos++;
01749         }
01750         return &token;
01751     }
01752 
01753 private:
01754     Token token;
01755     char* data;
01756     int length;
01757     int pos;
01758     bool newline;
01759 };
01760 
01761 
01762 /* an optimization over plain strcmp() */
01763 //#define    STREQ(a, b)    (*(a) == *(b) && strcmp((a), (b)) == 0)
01764 static inline bool STREQ(const Token *token, const char *b) {
01765     const char *a = token->data;
01766     int len = token->length;
01767     if (a == b) return true;
01768     while(*a && *b && len > 0) {
01769         if (*a != *b) return false;
01770         a++; b++; len--;
01771     }
01772     return (len == 0 && *b == 0);
01773 }
01774 
01775 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01776 {
01777     int i;
01778     double pct, maxpct, pctsum;
01779     double pcts[NTYPES];
01780     int mostaccurate, tokencount;
01781     int typeset, jonly, conly, jconly, objconly, cpponly;
01782     int has_escapes = 0;
01783     //unsigned char *s;
01784     //char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */
01785 
01786     /* these are easy, do them first */
01787     conf->accuracy = 70;
01788 
01789     /*
01790      * for troff, look for . + letter + letter or .\"; this must be done
01791      * to disambiguate tar archives' ./file and other trash from real
01792      * troff input.
01793      */
01794     if (*buf == '.') {
01795         unsigned char *tp = buf + 1;
01796 
01797         while (isascii(*tp) && isspace(*tp))
01798             ++tp;   /* skip leading whitespace */
01799         if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01800              isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01801             conf->resultBuf = MIME_APPL_TROFF;
01802             return 1;
01803         }
01804     }
01805     if ((*buf == 'c' || *buf == 'C') &&
01806         isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01807         /* Fortran */
01808         conf->resultBuf = MIME_TEXT_FORTRAN;
01809         return 1;
01810     }
01811     assert(nbytes-1 < HOWMANY + 1);
01812     /* look for tokens - this is expensive! */
01813     has_escapes = (memchr(buf, '\033', nbytes) != NULL);
01814         Tokenizer tokenizer((char*)buf, nbytes);
01815         const Token* token;
01816         bool linecomment = false, blockcomment = false;
01817     const struct names *p;
01818     int typecount[NTYPES];
01819 /*
01820  * Fritz:
01821  * Try a little harder on C/C++/Java.
01822  */
01823     memset(&typecount, 0, sizeof(typecount));
01824     typeset = 0;
01825     jonly = 0;
01826     conly = 0;
01827     jconly = 0;
01828     objconly = 0;
01829     cpponly = 0;
01830     tokencount = 0;
01831         bool foundClass = false; // mandatory for java
01832     // first collect all possible types and count matches
01833         // we stop at '>' too, because of "<title>blah</title>" on HTML pages
01834     while ((token = tokenizer.nextToken())->length > 0) {
01835 #ifdef DEBUG_MIMEMAGIC
01836             kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01837 #endif
01838             if (linecomment && tokenizer.isNewLine())
01839                 linecomment = false;
01840             if (blockcomment && STREQ(token, "*/")) {
01841                 blockcomment = false;
01842                 continue;
01843             }
01844             for (p = names; p->name ; p++) {
01845                 if (STREQ(token, p->name)) {
01846 #ifdef DEBUG_MIMEMAGIC
01847                     kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01848 #endif
01849                     tokencount++;
01850                     typeset |= p->type;
01851                     if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01852                         if (linecomment || blockcomment) {
01853                             continue;
01854                         }
01855                         else {
01856                             switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
01857                             {
01858                 case L_JAVA:
01859                     jonly++;
01860                     break;
01861                 case L_OBJC:
01862                     objconly++;
01863                     break;
01864                 case L_CPP:
01865                     cpponly++;
01866                     break;
01867                 case (L_CPP|L_JAVA):
01868                     jconly++;
01869                                         if ( !foundClass && STREQ(token, "class") )
01870                                             foundClass = true;
01871                     break;
01872                 case (L_C|L_CPP):
01873                     conly++;
01874                     break;
01875                 default:
01876                                     if (STREQ(token, "//")) linecomment = true;
01877                                     if (STREQ(token, "/*")) blockcomment = true;
01878                             }
01879             }
01880                     }
01881                     for (i = 0; i < (int)NTYPES; i++) {
01882                         if ((1 << i) & p->type) typecount[i]++;
01883                     }
01884         }
01885             }
01886     }
01887 
01888     if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01889         conf->accuracy = 60;
01890             if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
01891 #ifdef DEBUG_MIMEMAGIC
01892                         kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl;
01893 #endif
01894             if (jonly > 1 && foundClass) {
01895                 // At least two java-only tokens have matched, including "class"
01896                 conf->resultBuf = QString(types[P_JAVA].type);
01897                 return 1;
01898             }
01899             if (jconly > 1) {
01900                 // At least two non-C (only C++ or Java) token have matched.
01901                 if (typecount[P_JAVA] < typecount[P_CPP])
01902                   conf->resultBuf = QString(types[P_CPP].type);
01903                 else
01904                   conf->resultBuf = QString(types[P_JAVA].type);
01905                 return 1;
01906             }
01907                         if (conly + cpponly > 1) {
01908                  // Either C or C++.
01909                       if (cpponly > 0)
01910                                 conf->resultBuf = QString(types[P_CPP].type);
01911                               else
01912                                 conf->resultBuf = QString(types[P_C].type);
01913                               return 1;
01914                         }
01915             if (objconly > 0) {
01916                 conf->resultBuf =  QString(types[P_OBJC].type);
01917                 return 1;
01918             }
01919           }
01920     }
01921 
01922     /* Neither C, C++ or Java (or all of them without able to distinguish):
01923      * Simply take the token-class with the highest
01924      * matchcount > 0
01925      */
01926     mostaccurate = -1;
01927     maxpct = pctsum = 0.0;
01928     for (i = 0; i < (int)NTYPES; i++) {
01929       if (typecount[i] > 1) { // one word is not enough, we need at least two
01930         pct = (double)typecount[i] / (double)types[i].kwords *
01931             (double)types[i].weight;
01932         pcts[i] = pct;
01933         pctsum += pct;
01934         if (pct > maxpct) {
01935             maxpct = pct;
01936             mostaccurate = i;
01937           }
01938 #ifdef DEBUG_MIMEMAGIC
01939           kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01940 #endif
01941       }
01942     }
01943     if (mostaccurate >= 0) {
01944             if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java
01945             {
01946         conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01947 #ifdef DEBUG_MIMEMAGIC
01948                 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
01949 #endif
01950         conf->resultBuf = QString(types[mostaccurate].type);
01951         return 1;
01952             }
01953     }
01954 
01955     switch (is_tar(buf, nbytes)) {
01956         case 1:
01957             /* V7 tar archive */
01958             conf->resultBuf = MIME_APPL_TAR;
01959             conf->accuracy = 90;
01960             return 1;
01961         case 2:
01962             /* POSIX tar archive */
01963             conf->resultBuf = MIME_APPL_TAR;
01964             conf->accuracy = 90;
01965             return 1;
01966     }
01967 
01968     for (i = 0; i < nbytes; i++) {
01969         if (!isascii(*(buf + i)))
01970             return 0;   /* not all ascii */
01971     }
01972 
01973     /* all else fails, but it is ascii... */
01974     conf->accuracy = 90;
01975     if (has_escapes) {
01976         /* text with escape sequences */
01977         /* we leave this open for further differentiation later */
01978         conf->resultBuf = MIME_TEXT_UNKNOWN;
01979     } else {
01980         /* plain text */
01981         conf->resultBuf = MIME_TEXT_PLAIN;
01982     }
01983     return 1;
01984 }
01985 
01986 /* Maximal length of a line we consider "reasonable". */
01987 #define TEXT_MAXLINELEN 300
01988 
01989 // This code is taken from the "file" command, where it is licensed
01990 // in the "beer-ware license" :-)
01991 // Original author: <joerg@FreeBSD.ORG>
01992 // Simplified by David Faure to avoid the static array char[256].
01993 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01994 {
01995     int i;
01996     unsigned char *cp;
01997 
01998     nbytes--;
01999 
02000     /* First, look whether there are "unreasonable" characters. */
02001     for (i = 0, cp = buf; i < nbytes; i++, cp++)
02002         if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
02003             return 0;
02004 
02005     /* Now, look whether the file consists of lines of
02006      * "reasonable" length. */
02007 
02008     for (i = 0; i < nbytes;) {
02009         cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
02010         if (cp == NULL) {
02011             /* Don't fail if we hit the end of buffer. */
02012             if (i + TEXT_MAXLINELEN >= nbytes)
02013                 break;
02014             else
02015                 return 0;
02016         }
02017         if (cp - buf > TEXT_MAXLINELEN)
02018             return 0;
02019         i += (cp - buf + 1);
02020         buf = cp + 1;
02021     }
02022     conf->resultBuf = MIME_TEXT_PLAIN;
02023     return 1;
02024 }
02025 
02026 
02027 /*
02028  * is_tar() -- figure out whether file is a tar archive.
02029  *
02030  * Stolen (by author of file utility) from the public domain tar program: Public
02031  * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
02032  *
02033  * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
02034  * 1997/06/24 00:41:02 ikluft Exp ikluft $
02035  *
02036  * Comments changed and some code/comments reformatted for file command by Ian
02037  * Darwin.
02038  */
02039 
02040 #define    isodigit(c)    ( ((c) >= '0') && ((c) <= '7') )
02041 
02042 /*
02043  * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
02044  * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
02045  */
02046 
02047 static int
02048 is_tar(unsigned char *buf, int nbytes)
02049 {
02050     register union record *header = (union record *) buf;
02051     register int i;
02052     register long sum,
02053      recsum;
02054     register char *p;
02055 
02056     if (nbytes < (int)sizeof(union record))
02057          return 0;
02058 
02059     recsum = from_oct(8, header->header.chksum);
02060 
02061     sum = 0;
02062     p = header->charptr;
02063     for (i = sizeof(union record); --i >= 0;) {
02064         /*
02065          * We can't use unsigned char here because of old compilers,
02066          * e.g. V7.
02067          */
02068         sum += 0xFF & *p++;
02069     }
02070 
02071     /* Adjust checksum to count the "chksum" field as blanks. */
02072     for (i = sizeof(header->header.chksum); --i >= 0;)
02073         sum -= 0xFF & header->header.chksum[i];
02074     sum += ' ' * sizeof header->header.chksum;
02075 
02076     if (sum != recsum)
02077         return 0;       /* Not a tar archive */
02078 
02079     if (0 == strcmp(header->header.magic, TMAGIC))
02080         return 2;       /* Unix Standard tar archive */
02081 
02082     return 1;               /* Old fashioned tar archive */
02083 }
02084 
02085 
02086 /*
02087  * Quick and dirty octal conversion.
02088  *
02089  * Result is -1 if the field is invalid (all blank, or nonoctal).
02090  */
02091 static long
02092 from_oct(int digs, char *where)
02093 {
02094     register long value;
02095 
02096     while (isspace(*where)) {   /* Skip spaces */
02097         where++;
02098         if (--digs <= 0)
02099             return -1;  /* All blank field */
02100     }
02101     value = 0;
02102     while (digs > 0 && isodigit(*where)) {  /* Scan til nonoctal */
02103         value = (value << 3) | (*where++ - '0');
02104         --digs;
02105     }
02106 
02107     if (digs > 0 && *where && !isspace(*where))
02108         return -1;      /* Ended on non-space/nul */
02109 
02110     return value;
02111 }
02112 
02113 KMimeMagic::KMimeMagic()
02114 {
02115     // Magic file detection init
02116     QString mimefile = locate( "mime", "magic" );
02117     init( mimefile );
02118     // Add snippets from share/config/magic/*
02119     QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02120     for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02121         if ( !mergeConfig( *it ) )
02122             kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02123 }
02124 
02125 KMimeMagic::KMimeMagic(const QString & _configfile)
02126 {
02127     init( _configfile );
02128 }
02129 
02130 void KMimeMagic::init( const QString& _configfile )
02131 {
02132     int result;
02133     conf = new config_rec;
02134 
02135     /* set up the magic list (empty) */
02136     conf->magic = conf->last = NULL;
02137     magicResult = NULL;
02138     conf->followLinks = false;
02139 
02140         conf->utimeConf = 0L; // created on demand
02141     /* on the first time through we read the magic file */
02142     result = apprentice(_configfile);
02143     if (result == -1)
02144         return;
02145 #ifdef MIME_MAGIC_DEBUG_TABLE
02146     test_table();
02147 #endif
02148 }
02149 
02150 /*
02151  * The destructor.
02152  * Free the magic-table and other resources.
02153  */
02154 KMimeMagic::~KMimeMagic()
02155 {
02156     if (conf) {
02157         struct magic *p = conf->magic;
02158         struct magic *q;
02159         while (p) {
02160             q = p;
02161             p = p->next;
02162             free(q);
02163         }
02164                 delete conf->utimeConf;
02165         delete conf;
02166     }
02167         delete magicResult;
02168 }
02169 
02170 bool
02171 KMimeMagic::mergeConfig(const QString & _configfile)
02172 {
02173     kdDebug(7018) << k_funcinfo << _configfile << endl;
02174     int result;
02175 
02176     if (_configfile.isEmpty())
02177         return false;
02178     result = apprentice(_configfile);
02179     if (result == -1) {
02180         return false;
02181     }
02182 #ifdef MIME_MAGIC_DEBUG_TABLE
02183     test_table();
02184 #endif
02185     return true;
02186 }
02187 
02188 bool
02189 KMimeMagic::mergeBufConfig(char * _configbuf)
02190 {
02191     int result;
02192 
02193     if (conf) {
02194         result = buff_apprentice(_configbuf);
02195         if (result == -1)
02196             return false;
02197 #ifdef MIME_MAGIC_DEBUG_TABLE
02198         test_table();
02199 #endif
02200         return true;
02201     }
02202     return false;
02203 }
02204 
02205 void
02206 KMimeMagic::setFollowLinks( bool _enable )
02207 {
02208     conf->followLinks = _enable;
02209 }
02210 
02211 KMimeMagicResult *
02212 KMimeMagic::findBufferType(const QByteArray &array)
02213 {
02214     unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
02215 
02216     conf->resultBuf = QString::null;
02217     if ( !magicResult )
02218       magicResult = new KMimeMagicResult();
02219     magicResult->setInvalid();
02220     conf->accuracy = 100;
02221 
02222     int nbytes = array.size();
02223 
02224         if (nbytes > HOWMANY)
02225                 nbytes = HOWMANY;
02226         memcpy(buf, array.data(), nbytes);
02227         if (nbytes == 0) {
02228                 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02229         } else {
02230                 buf[nbytes++] = '\0';   /* null-terminate it */
02231                 tryit(conf, buf, nbytes);
02232         }
02233         /* if we have any results, put them in the request structure */
02234     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02235     magicResult->setAccuracy(conf->accuracy);
02236         return magicResult;
02237 }
02238 
02239 static void
02240 refineResult(KMimeMagicResult *r, const QString & _filename)
02241 {
02242     QString tmp = r->mimeType();
02243     if (tmp.isEmpty())
02244         return;
02245     if ( tmp == "text/x-c" || tmp == "text/x-objc" )
02246     {
02247         if ( _filename.right(2) == ".h" )
02248             tmp += "hdr";
02249         else
02250             tmp += "src";
02251         r->setMimeType(tmp);
02252     }
02253     else
02254     if ( tmp == "text/x-c++" )
02255     {
02256         if ( _filename.endsWith(".h")
02257           || _filename.endsWith(".hh")
02258           || _filename.endsWith(".H")
02259           || !_filename.right(4).contains('.'))
02260             tmp += "hdr";
02261         else
02262             tmp += "src";
02263         r->setMimeType(tmp);
02264     }
02265 }
02266 
02267 KMimeMagicResult *
02268 KMimeMagic::findBufferFileType( const QByteArray &data,
02269                 const QString &fn)
02270 {
02271         KMimeMagicResult * r = findBufferType( data );
02272     refineResult(r, fn);
02273         return r;
02274 }
02275 
02276 /*
02277  * Find the content-type of the given file.
02278  */
02279 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02280 {
02281 #ifdef DEBUG_MIMEMAGIC
02282     kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02283 #endif
02284     conf->resultBuf = QString::null;
02285 
02286         if ( !magicResult )
02287       magicResult = new KMimeMagicResult();
02288     magicResult->setInvalid();
02289     conf->accuracy = 100;
02290 
02291         if ( !conf->utimeConf )
02292             conf->utimeConf = new KMimeMagicUtimeConf();
02293 
02294         /* process it based on the file contents */
02295         process(conf, fn );
02296 
02297         /* if we have any results, put them in the request structure */
02298         //finishResult();
02299     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02300     magicResult->setAccuracy(conf->accuracy);
02301     refineResult(magicResult, fn);
02302         return magicResult;
02303 }
KDE Logo
This file is part of the documentation for kio Library Version 3.4.0.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Sat May 7 22:07:50 2005 by doxygen 1.3.9.1 written by Dimitri van Heesch, © 1997-2003