Server IP : 127.0.0.2 / Your IP : 18.216.67.249 Web Server : Apache/2.4.18 (Ubuntu) System : User : www-data ( ) PHP Version : 7.0.33-0ubuntu0.16.04.16 Disable Function : disk_free_space,disk_total_space,diskfreespace,dl,exec,fpaththru,getmyuid,getmypid,highlight_file,ignore_user_abord,leak,listen,link,opcache_get_configuration,opcache_get_status,passthru,pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,php_uname,phpinfo,posix_ctermid,posix_getcwd,posix_getegid,posix_geteuid,posix_getgid,posix_getgrgid,posix_getgrnam,posix_getgroups,posix_getlogin,posix_getpgid,posix_getpgrp,posix_getpid,posix,_getppid,posix_getpwnam,posix_getpwuid,posix_getrlimit,posix_getsid,posix_getuid,posix_isatty,posix_kill,posix_mkfifo,posix_setegid,posix_seteuid,posix_setgid,posix_setpgid,posix_setsid,posix_setuid,posix_times,posix_ttyname,posix_uname,pclose,popen,proc_open,proc_close,proc_get_status,proc_nice,proc_terminate,shell_exec,source,show_source,system,virtual MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /opt/odoo/addons/document/models/ |
Upload File : |
# -*- coding: utf-8 -*- # Part of Odoo. See LICENSE file for full copyright and licensing details. import logging import pyPdf import xml.dom.minidom import zipfile from StringIO import StringIO from odoo import api, models _logger = logging.getLogger(__name__) FTYPES = ['docx', 'pptx', 'xlsx', 'opendoc', 'pdf'] # Keep function in case it is necessary to do toUnicode(buf.encode('ascii', 'replace')) def toUnicode(s): try: return s.decode('utf-8') except UnicodeError: try: return s.decode('latin') except UnicodeError: try: return s.encode('ascii') except UnicodeError: return s def textToString(element): buff = u"" for node in element.childNodes: if node.nodeType == xml.dom.Node.TEXT_NODE: buff += node.nodeValue elif node.nodeType == xml.dom.Node.ELEMENT_NODE: buff += textToString(node) return buff class IrAttachment(models.Model): _inherit = 'ir.attachment' def _index_docx(self, bin_data): '''Index Microsoft .docx documents''' buf = u"" f = StringIO(bin_data) if zipfile.is_zipfile(f): try: zf = zipfile.ZipFile(f) content = xml.dom.minidom.parseString(zf.read("word/document.xml")) for val in ["w:p", "w:h", "text:list"]: for element in content.getElementsByTagName(val): buf += textToString(element) + "\n" except Exception: pass return buf def _index_pptx(self, bin_data): '''Index Microsoft .pptx documents''' buf = u"" f = StringIO(bin_data) if zipfile.is_zipfile(f): try: zf = zipfile.ZipFile(f) zf_filelist = [x for x in zf.namelist() if x.startswith('ppt/slides/slide')] for i in range(1, len(zf_filelist) + 1): content = xml.dom.minidom.parseString(zf.read('ppt/slides/slide%s.xml' % i)) for val in ["a:t"]: for element in content.getElementsByTagName(val): buf += textToString(element) + "\n" except Exception: pass return buf def _index_xlsx(self, bin_data): '''Index Microsoft .xlsx documents''' buf = u"" f = StringIO(bin_data) if zipfile.is_zipfile(f): try: zf = zipfile.ZipFile(f) content = xml.dom.minidom.parseString(zf.read("xl/sharedStrings.xml")) for val in ["t"]: for element in content.getElementsByTagName(val): buf += textToString(element) + "\n" except Exception: pass return buf def _index_opendoc(self, bin_data): '''Index OpenDocument documents (.odt, .ods...)''' buf = u"" f = StringIO(bin_data) if zipfile.is_zipfile(f): try: zf = zipfile.ZipFile(f) content = xml.dom.minidom.parseString(zf.read("content.xml")) for val in ["text:p", "text:h", "text:list"]: for element in content.getElementsByTagName(val): buf += textToString(element) + "\n" except Exception: pass return buf def _index_pdf(self, bin_data): '''Index PDF documents''' buf = u"" if bin_data.startswith('%PDF-'): f = StringIO(bin_data) try: pdf = pyPdf.PdfFileReader(f) for page in pdf.pages: buf += page.extractText() except Exception: pass return buf @api.model def _index(self, bin_data, datas_fname, mimetype): for ftype in FTYPES: buf = getattr(self, '_index_%s' % ftype)(bin_data) if buf: return buf return super(IrAttachment, self)._index(bin_data, datas_fname, mimetype)