Server IP : 127.0.0.2 / Your IP : 18.190.152.109 Web Server : Apache/2.4.18 (Ubuntu) System : User : www-data ( ) PHP Version : 7.0.33-0ubuntu0.16.04.16 Disable Function : disk_free_space,disk_total_space,diskfreespace,dl,exec,fpaththru,getmyuid,getmypid,highlight_file,ignore_user_abord,leak,listen,link,opcache_get_configuration,opcache_get_status,passthru,pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,php_uname,phpinfo,posix_ctermid,posix_getcwd,posix_getegid,posix_geteuid,posix_getgid,posix_getgrgid,posix_getgrnam,posix_getgroups,posix_getlogin,posix_getpgid,posix_getpgrp,posix_getpid,posix,_getppid,posix_getpwnam,posix_getpwuid,posix_getrlimit,posix_getsid,posix_getuid,posix_isatty,posix_kill,posix_mkfifo,posix_setegid,posix_seteuid,posix_setgid,posix_setpgid,posix_setsid,posix_setuid,posix_times,posix_ttyname,posix_uname,pclose,popen,proc_open,proc_close,proc_get_status,proc_nice,proc_terminate,shell_exec,source,show_source,system,virtual MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /opt/odoo/odoo/tools/ |
Upload File : |
# -*- coding: utf-8 -*- """ Mimetypes-related utilities # TODO: reexport stdlib mimetypes? """ import collections import io import logging import re import zipfile __all__ = ['guess_mimetype'] _logger = logging.getLogger(__name__) # We define our own guess_mimetype implementation and if magic is available we # use it instead. # discriminants for zip-based file formats _ooxml_dirs = { 'word/': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'pt/': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'xl/': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', } def _check_ooxml(data): with io.BytesIO(data) as f, zipfile.ZipFile(f) as z: filenames = z.namelist() # OOXML documents should have a [Content_Types].xml file for early # check that we're interested in this thing at all if '[Content_Types].xml' not in filenames: return False # then there is a directory whose name denotes the type of the file: # word, pt (powerpoint) or xl (excel) for dirname, mime in _ooxml_dirs.iteritems(): if any(entry.startswith(dirname) for entry in filenames): return mime return False # checks that a string looks kinda sorta like a mimetype _mime_validator = re.compile(r""" [\w-]+ # type-name / # subtype separator [\w-]+ # registration facet or subtype (?:\.[\w-]+)* # optional faceted name (?:\+[\w-]+)? # optional structured syntax specifier """, re.VERBOSE) def _check_open_container_format(data): with io.BytesIO(data) as f, zipfile.ZipFile(f) as z: # OCF zip files must contain a ``mimetype`` entry if 'mimetype' not in z.namelist(): return False # it holds the exact mimetype for the file marcel = z.read('mimetype') # check that it's not too long (RFC6838 ยง 4.2 restricts type and # subtype to 127 characters each + separator, strongly recommends # limiting them to 64 but does not require it) and that it looks a lot # like a valid mime type if len(marcel) < 256 and _mime_validator.match(marcel): return marcel return False _xls_pattern = re.compile(""" \x09\x08\x10\x00\x00\x06\x05\x00 | \xFD\xFF\xFF\xFF(\x10|\x1F|\x20|"|\\#|\\(|\\)) """, re.VERBOSE) _ppt_pattern = re.compile(""" \x00\x6E\x1E\xF0 | \x0F\x00\xE8\x03 | \xA0\x46\x1D\xF0 | \xFD\xFF\xFF\xFF(\x0E|\x1C|\x43)\x00\x00\x00 """, re.VERBOSE) def _check_olecf(data): """ Pre-OOXML Office formats are OLE Compound Files which all use the same file signature ("magic bytes") and should have a subheader at offset 512 (0x200). Subheaders taken from http://www.garykessler.net/library/file_sigs.html according to which Mac office files *may* have different subheaders. We'll ignore that. """ offset = 0x200 if data.startswith('\xEC\xA5\xC1\x00', offset): return 'application/msword' # the _xls_pattern stuff doesn't seem to work correctly (the test file # only has a bunch of \xf* at offset 0x200), that apparently works elif 'Microsoft Excel' in data: return 'application/vnd.ms-excel' elif _ppt_pattern.match(data, offset): return 'application/vnd.ms-powerpoint' return False # for "master" formats with many subformats, discriminants is a list of # functions, tried in order and the first non-falsy value returned is the # selected mime type. If all functions return falsy values, the master # mimetype is returned. _Entry = collections.namedtuple('_Entry', ['mimetype', 'signatures', 'discriminants']) _mime_mappings = ( # pdf _Entry('application/pdf', ['%PDF'], []), # jpg, jpeg, png, gif, bmp _Entry('image/jpeg', ['\xFF\xD8\xFF\xE0', '\xFF\xD8\xFF\xE2', '\xFF\xD8\xFF\xE3', '\xFF\xD8\xFF\xE1'], []), _Entry('image/png', ['\x89PNG\r\n\x1A\n'], []), _Entry('image/gif', ['GIF87a', 'GIF89a'], []), _Entry('image/bmp', ['BM'], []), # OLECF files in general (Word, Excel, PPT, default to word because why not?) _Entry('application/msword', ['\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1', '\x0D\x44\x4F\x43'], [ _check_olecf ]), # zip, but will include jar, odt, ods, odp, docx, xlsx, pptx, apk _Entry('application/zip', ['PK\x03\x04'], [_check_ooxml, _check_open_container_format]), ) def guess_mimetype(bin_data, default='application/octet-stream'): """ Attempts to guess the mime type of the provided binary data, similar to but significantly more limited than libmagic :param str bin_data: binary data to try and guess a mime type for :returns: matched mimetype or ``application/octet-stream`` if none matched """ # by default, guess the type using the magic number of file hex signature (like magic, but more limited) # see http://www.filesignatures.net/ for file signatures for entry in _mime_mappings: for signature in entry.signatures: if bin_data.startswith(signature): for discriminant in entry.discriminants: try: guess = discriminant(bin_data) if guess: return guess except Exception: # log-and-next _logger.getChild('guess_mimetype').warn( "Sub-checker '%s' of type '%s' failed", discriminant.__name__, entry.mimetype, exc_info=True ) # if no discriminant or no discriminant matches, return # primary mime type return entry.mimetype return default try: import magic except ImportError: magic = None else: # There are 2 python libs named 'magic' with incompatible api. # magic from pypi https://pypi.python.org/pypi/python-magic/ if hasattr(magic,'from_buffer'): guess_mimetype = lambda bin_data, default=None: magic.from_buffer(bin_data, mime=True) # magic from file(1) https://packages.debian.org/squeeze/python-magic elif hasattr(magic,'open'): ms = magic.open(magic.MAGIC_MIME_TYPE) ms.load() guess_mimetype = lambda bin_data, default=None: ms.buffer(bin_data)