#!/usr/bin/python
# -*- encoding: utf-8 -*-

##
## dbpickle.py
## Created on Thu Nov 30 00:26:52 2006
## by akaihola
##
## Copyright (C) 2006 Antti Kaihola
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##

"""
dbpickle.py is a Django tool for saving all database objects into a
file or loading and storing them back into the database.  It's useful
for migrating from one database engine to another.

This version handles ForeignKeys which refer to the related model's
primary key.  ManyToMany relations are now supported, but OneToOne
relations are not.

You can also use you own plugin for pre-processing objects before
saving them into the database.  This is useful e.g. when a model has
changed and you're importing objects from an old version of the
database.  The plugin should be a Python program with a
``pre_save_hook(obj)`` and/or ``pre_walk_hook(obj)`` function which
returns True if it modified the object, otherwise False.

Be careful with objects automatically inserted in your app e.g. in save()
methods.  Those could mess up ID numbering in databases which use sequence
tables.


Usage example
=============

Let's suppose your project is using PostgreSQL and the database has
been populated using the app.

$ PYTHONPATH=/home/myproject DJANGO_SETTINGS_MODULE=settings \
  ./dbpickle.py --dump --file=myproject.pickle

At this point you could change the settings so that SQLite is used
instead of PostgreSQL.

$ PYTHONPATH=/home/myproject DJANGO_SETTINGS_MODULE=settings \
  /home/myproject/manage.py syncdb

(answer 'no' when asked about creating a superuser)

$ PYTHONPATH=/home/myproject DJANGO_SETTINGS_MODULE=settings \
  ./dbpickle.py --load --file=myproject.pickle

This would effectively convert your database from PostgreSQL to SQLite
through Django.
"""

import cPickle
import logging
from imp import load_source
import os

try:
    set # Only available in Python 2.4+
except NameError:
    from sets import Set as set # Python 2.3 fallback

def dump(filepath):
    """
    Pickle all Django objects from the database and save them into the
    given file.
    """
    objects = {}   # model instances
    m2m_lists = [] # ManyToMany relations between model instances
    for model in models.get_models():
        meta = model._meta
        app, model_name = meta.app_label, meta.module_name
        logging.info('dumping %s.%s' % (app, model_name))

        # get all many-to-many relation field names for this model
        m2ms = [m2m.name for m2m in meta.many_to_many]

        try:
            for obj in model.objects.all():
                logging.debug('dumping %s.%s %r' % (app, model_name, obj))
                pk = obj._get_pk_val()
                objects[app, model_name, pk] = obj

                for m2m in m2ms:
                    # store many-to-many related objects for every
                    # many-to-many relation of this object
                    foreign_objs = getattr(obj, m2m).all()
                    logging.debug('dumping %s.%s.%s x %d' % (app, model_name, m2m, len(foreign_objs)))
                    m2m_lists.append((obj, m2m, tuple(foreign_objs)))
        except (backend.Database.OperationalError,
                backend.Database.ProgrammingError), e:
            logging.warning('table for %r not found (%s)' % (model, e))
            transaction.rollback_unless_managed()

    # pickle all objects and many-to-many relations on disk
    cPickle.dump((objects, m2m_lists), file(filepath, 'w'))


#@transaction.commit_on_success
def load(filepath, pluginpath=None):
    """
    Unpickle Django objects from the given file and save them into the
    database.
    """

    # load the plugin if specified on the command line
    if pluginpath:
        plugin = load_source('plugin', pluginpath)
    else:
        plugin = None

    # get the hook functions from the plugin
    hooks = {}
    for hookname in 'pre_save', 'pre_walk':
        hooks[hookname] = getattr(plugin, '%s_hook' % hookname, lambda obj: False)

    # unpickle objects and many-to-many relations from disk
    objects, m2m_lists = cPickle.load(file(filepath))

    # Find distinct models of all unpickled objects and delete all
    # objects before loading.  Note that models which have not been
    # dumped are not emptied.
    models = set( [obj.__class__ for obj in objects.itervalues()] )
    for model in models:
        try:
            for obj in model._default_manager.all():
                obj.delete()
        except (backend.Database.OperationalError,
                backend.Database.ProgrammingError), e:
            logging.warning('table for %r not found (%s)' % (model, e))
            transaction.rollback_unless_managed()

    # load all objects
    while objects:
        key, obj = objects.popitem()
        load_recursive(objects, obj, hooks)

    # load all many-to-many relations
    for obj1, m2m, foreign_objs in m2m_lists:
        meta1 = obj1._meta
        for obj2 in foreign_objs:
            meta2 = obj2._meta
            logging.debug('loading ManyToMany %s.%s.%s -> %s.%s.%s' % (
                meta1.app_label, meta1.module_name, obj1._get_pk_val(),
                meta2.app_label, meta2.module_name, obj2._get_pk_val()))
            getattr(obj1, m2m).add(obj2)

#load = transaction.commit_on_success(load)


def load_recursive(objects, obj, hooks):
    """
    Save the given object into the database.  If the object has
    ForeignKey relations to other objects, first make sure they are
    already saved (and repeat recursively).
    """

    meta = obj._meta

    if hooks['pre_walk'](obj) is None:
        logging.debug('skipping %s.%s %s' % (
            meta.app_label, meta.module_name, obj._get_pk_val()))
        return

    for field in meta.fields:
        if isinstance(field, models.ForeignKey):
            related_meta = field.rel.to._meta
            related_app = related_meta.app_label
            related_model = related_meta.module_name
            related_pk_val = getattr(obj, field.name+'_id')
            try:
                related_obj = objects.pop((related_app,
                                           related_model,
                                           related_pk_val))
                load_recursive(objects, related_obj, hooks)
            except KeyError:
                logging.debug('probably loaded already: '
                              '%(related_app)s.%(related_model)s '
                              '%(related_pk_val)s' % locals())

    logging.debug('loading %s.%s %s' % (
        meta.app_label,
        meta.module_name,
        obj._get_pk_val()))
    try:
        if hooks['pre_save'](obj) is None:
            logging.debug('skipping %s.%s %s' % (
                meta.app_label, meta.module_name, obj._get_pk_val()))
        else:
            obj.save()

    except Exception, e:
        logging.error('%s while saving %s.%s %s %s' % (
            e, meta.app_label, meta.module_name, obj._get_pk_val(), obj))
        raise


if __name__ == '__main__':
    from optparse import OptionParser
    p = OptionParser()
    p.add_option('-s', '--settings' , action='store', help='Set the Django settings file')
    p.add_option('-d', '--dump', action='store_const', const='dump', dest='action', help='Dump all Django objects into a file')
    p.add_option('-l', '--load', action='store_const', const='load', dest='action', help='Load all Django objects from a file')
    p.add_option('-f', '--file', default='djangodb.pickle', help='Specify file path [djangodb.pickle]')
    p.add_option('-p', '--plugin', help='Use .py plugin for preprocessing objects to load')
    p.add_option('-v', '--verbose' , action='store_const', const=logging.DEBUG, dest='loglevel', help='Show verbose output for debugging')
    p.add_option('-q', '--quiet' , action='store_const', const=logging.FATAL, dest='loglevel', help='No output at all')
    (opts, args) = p.parse_args()
    loglevel = opts.loglevel or logging.INFO
    try:
        # Python 2.4+ syntax
        logging.basicConfig(level=loglevel, format='%(levelname)-8s %(message)s')
    except TypeError:
        # Python 2.3
        logging.basicConfig()

    if opts.settings:
        os.environ['DJANGO_SETTINGS_MODULE'] = opts.settings

    from django.db import transaction, models, backend

    if opts.action == 'dump':
        dump(opts.file)
    elif opts.action == 'load':
        load = transaction.commit_on_success(load)
        load(opts.file, opts.plugin)
    else:
        print 'Please specify --dump or --load'
