""" Export authored content from Plone to filesystem for static site migration. Tested against Plone 4.3. Attempts to be as agnostic about the CMF portal types as possible, except for assuming Archetypes. As such, duck-typing is used wherever possible. IOW, more Pythonic than Zopeish. """ import os import time import posixpath import mimetypes import tempfile import subprocess import logging import pprint from Products.Archetypes import Storage from Products.Archetypes import Marshall logger = logging.getLogger("export_content") mimetypes.init() MIME_TYPES_PRESERVE_EXT = {"application/octet-stream"} EXPORT_PATH = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), "content", ) def get_basename_root_ext(obj, basename, field=None): """ Determine then best filename with extension based on available information. """ # Prefer the field's filename if available if field is not None and hasattr(field, "getFilename"): filename = field.getFilename(obj) if filename: basename = filename root, ext = posixpath.splitext(basename) # If extension isn't recognized in either registry, assume the extension # is actually intended to be a part of the basename mime_type = None ext_registry_type = obj.mimetypes_registry.lookupExtension(ext[1:]) if ext_registry_type is not None: mime_type = str(ext_registry_type) if mime_type is None: mime_type = mimetypes.guess_type(basename)[0] if mime_type is None and mime_type not in MIME_TYPES_PRESERVE_EXT: root = basename ext = "" # If there is no recognized extension, use the field's MIME type, if # available, to lookup an appropriate extension if field is not None and not ext: mime_type = field.getContentType(obj) if mime_type not in MIME_TYPES_PRESERVE_EXT: registry_mime_types = obj.mimetypes_registry.lookup(mime_type) exts = [] for registry_mime_type in registry_mime_types: exts.extend( "." + registry_ext for registry_ext in registry_mime_type.extensions ) exts.extend(mimetypes.guess_all_extensions(mime_type)) if exts and ext not in exts: ext = exts[0] root = basename basename = root + ext logger.debug( "Determined extension for %r, %r: %r, %r, %r", obj, field, basename, root, ext, ) return basename, root, ext def export_apply(obj, path): """ Export all file-like Archetypes fields to files on the filesystem. """ # Filter which Zope objects we support exporting if not hasattr(obj, "SearchableText"): logger.debug("Skipping Zope object that isn't CMF content: %s", path) return elif not hasattr(obj, "getPrimaryField"): logger.warning("Skipping CMF content that isn't an Archetype: %s", path) return primary_field = obj.getPrimaryField() if primary_field is None: logger.warning("Skipping AT content with no primary field: %s", path) return elif not hasattr(obj, "__getitem__"): logger.warning("Skipping AT content without dictionay field access: %s", path) return # Identify the fields to write to files on the filesystem fields = [(primary_field.getName(), primary_field)] fields.extend( (field.getName(), field) for field in obj.Schema().fields() # Don't duplicate if field.getName() != primary_field.getName() # Include all fields appropriate for exporting to a file: # e.g. News Item Lead Image and hasattr(field, "getFilename") # Don't export file-like fields used for common metadata: # e.g. Description and not isinstance(field.getStorage(), Storage.MetadataStorage) ) # Write file-like fields to files on the filesystem parent, orig_basename = posixpath.split(path.lstrip(posixpath.sep)) orig_basename, orig_root, orig_ext = get_basename_root_ext(obj, orig_basename) export_parent_path = os.path.join(EXPORT_PATH, parent) if not os.path.isdir(export_parent_path): os.makedirs(export_parent_path) for field_name, field in fields: # Idenfify the filename to write to on the filesystem basename, root, ext = get_basename_root_ext(obj, orig_basename, field) export_path = os.path.join(export_parent_path, basename) # Get the raw/binary value to write to the filesystem raw_value = field.get(obj, raw=True) if hasattr(raw_value, "getRaw"): raw_value = raw_value.getRaw() if hasattr(raw_value, "data"): raw_value = str(raw_value.data) if not raw_value: logger.warning("Empty %r field for: %s", field_name, path) continue # Write the raw value to the filesystem if os.path.exists(export_path): # Probably actually is the same file, such as when an image has # been uploaded to a folder as an indiviudal content item *and* as # the lead image for a news item. So keep numbered, hidden # backups of all but keep only one at the "official" name. logger.warning( "Export file alreaady exists for field %r: %s", field_name, path, ) logger.info("Exporting %r field to: %s", field_name, export_path) with tempfile.NamedTemporaryFile( dir=export_parent_path, prefix=root, suffix=ext, delete=False, ) as tmp_file: tmp_file.write(raw_value) tmp_filename = tmp_file.name # Use the numbered backup logic of `$ mv` subprocess.check_call( ["mv", "--backup=numbered", "-v", tmp_filename, export_path], ) # Match the modification time os.utime(export_path, ( time.time(), obj.bobobase_modification_time().timeTime(), )) basename, root, ext = orig_basename, orig_root, orig_ext # Export all other supported fields, including Images and Files. # # Attempt to re-use the logic for mapping fields to FTP/WebDAV properties, # but do it for all content and write to a separate file. Done to capture # field values for CMF portal types whose FTP/WebDAV representation # wouldn't include other fields. marshalled = Marshall.RFC822Marshaller().marshall(obj)[-1] # Separate the headers from the body text properties = marshalled[:marshalled.find("\n\n") + 1] logger.info("Exporting other properties for: %s", path) properties_path = os.path.join(export_parent_path, root + ".properties") with open(properties_path, "wb") as properties_file: properties_file.write(properties) # Match the modification time os.utime(properties_path, ( time.time(), obj.bobobase_modification_time().timeTime(), )) # Symlink `index.*` when appropriate if obj.plone_utils.isDefaultPage(obj): # Link to the `*.html` filename under the assumption that a static # site generator will create it from the actual source file os.symlink(root + ".html", os.path.join(export_parent_path, "index.html")) def export(app): return app.ZopeFindAndApply(obj=app, search_sub=1, apply_func=export_apply) export.__doc__ = __doc__ if __name__ == "__main__" and "app" in globals(): root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) root_logger.handlers[0].setLevel(logging.INFO) try: pprint.pprint(export(app)) except BaseException: import pdb; pdb.post_mortem() raise