# -*- coding:utf-8 -*-
## src/common/logger.py
## Copyright (C) 2003-2014 Yann Leboulanger <asterix AT lagaule.org>
## Copyright (C) 2004-2005 Vincent Hanquez <tab AT snarc.org>
## Copyright (C) 2005-2006 Nikos Kouremenos <kourem AT gmail.com>
## Copyright (C) 2006 Dimitur Kirov <dkirov AT gmail.com>
## Copyright (C) 2006-2008 Jean-Marie Traissard <jim AT lapin.org>
## Copyright (C) 2007 Tomasz Melcer <liori AT exroot.org>
##                    Julien Pivotto <roidelapluie AT gmail.com>
## This file is part of Gajim.
## Gajim is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published
## by the Free Software Foundation; version 3 only.
## Gajim is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with Gajim. If not, see <http://www.gnu.org/licenses/>.

This module allows to access the on-disk database of logs

import os
import sys
import time
import datetime
import calendar
import json
from collections import namedtuple
from gzip import GzipFile
from io import BytesIO
from gi.repository import GLib
from enum import IntEnum, unique

from common import exceptions
from common import gajim
from common import ged

import sqlite3 as sqlite

LOG_DB_PATH = gajim.gajimpaths['LOG_DB']
CACHE_DB_PATH = gajim.gajimpaths['CACHE_DB']

import logging
log = logging.getLogger('gajim.c.logger')

class JIDConstant(IntEnum):
    ROOM_TYPE = 1

class KindConstant(IntEnum):
    STATUS = 0
    GCSTATUS = 1
    GC_MSG = 2
    ERROR = 7

    def __str__(self):
        return str(self.value)

class ShowConstant(IntEnum):
    ONLINE = 0
    CHAT = 1
    AWAY = 2
    XA = 3
    DND = 4
    OFFLINE = 5

class TypeConstant(IntEnum):
    AIM = 0
    GG = 1
    HTTP_WS = 2
    ICQ = 3
    MSN = 4
    QQ = 5
    SMS = 6
    SMTP = 7
    TLEN = 8
    YAHOO = 9
    NEWMAIL = 10
    RSS = 11
    WEATHER = 12
    MRIM = 13

class SubscriptionConstant(IntEnum):
    NONE = 0
    TO = 1
    FROM = 2
    BOTH = 3

class Logger:
    def __init__(self):
        self.jids_already_in = [] # holds jids that we already have in DB
        self.con = None
        self.commit_timout_id = None

        if not os.path.exists(LOG_DB_PATH):
            # this can happen only the first time (the time we create the db)
            # db is not created here but in src/common/checks_paths.py
        if not os.path.exists(CACHE_DB_PATH):
            # this can happen cache database is not present when gajim is launched
            # db will be created in src/common/checks_paths.py
            ged.POSTCORE, self._nec_gc_message_received)

    def namedtuple_factory(cursor, row):
        con.row_factory = namedtuple_factory
        fields = [col[0] for col in cursor.description]
        Row = namedtuple("Row", fields)
        named_row = Row(*row)
        if 'additional_data' in fields:
            named_row = named_row._replace(
        return named_row

    def dispatch(self, event, error):
        gajim.ged.raise_event(event, None, str(error))

    def close_db(self):
        if self.con:
        self.con = None
        self.cur = None

    def open_db(self):

        # FIXME: sqlite3_open wants UTF8 strings. So a path with
        # non-ascii chars doesn't work. See #2812 and
        # http://lists.initd.org/pipermail/pysqlite/2005-August/000134.html
        back = os.getcwd()

        # if locked, wait up to 20 sec to unlock
        # before raise (hopefully should be enough)

        self.con = sqlite.connect(LOG_DB_FILE, timeout=20.0,
        self.con.row_factory = self.namedtuple_factory

        # DB functions
        self.con.create_function("like", 1, self._like)
        self.con.create_function("get_timeout", 0, self._get_timeout)

        self.cur = self.con.cursor()

    def attach_cache_database(self):
            self.cur.execute("ATTACH DATABASE '%s' AS cache" % \
                CACHE_DB_PATH.replace("'", "''"))
        except sqlite.Error as e:
            log.debug("Failed to attach cache database: %s" % str(e))

    def set_synchronous(self, sync):
            if sync:
                self.cur.execute("PRAGMA synchronous = NORMAL")
                self.cur.execute("PRAGMA synchronous = OFF")
        except sqlite.Error as e:
            log.debug("Failed to set_synchronous(%s): %s" % (sync, str(e)))

    def init_vars(self):

    def _get_timeout():
        returns the timeout in epoch
        timeout = gajim.config.get('restore_timeout')

        now = int(time.time())
        if timeout > 0:
            timeout = now - (timeout * 60)
        return timeout

    def _like(search_str):
        return '%{}%'.format(search_str)

    def commit(self):
        except sqlite.OperationalError as e:
            print(str(e), file=sys.stderr)
        self.commit_timout_id = None
        return False

    def _timeout_commit(self):
        if self.commit_timout_id:
        self.commit_timout_id = GLib.timeout_add(500, self.commit)

    def simple_commit(self, sql_to_commit):
        Helper to commit

    def get_jids_already_in_db(self):
            self.cur.execute('SELECT jid FROM jids')
            rows = self.cur.fetchall()
        except sqlite.DatabaseError:
            raise exceptions.DatabaseMalformed(LOG_DB_PATH)
        self.jids_already_in = []
        for row in rows:
            if not row.jid:
                # malformed jid, ignore line

    def get_jids_in_db(self):
        return self.jids_already_in

    def jid_is_from_pm(self, jid):
        If jid is gajim@conf/nkour it's likely a pm one, how we know gajim@conf
        is not a normal guy and nkour is not his resource?  we ask if gajim@conf
        is already in jids (with type room jid) this fails if user disables
        logging for room and only enables for pm (so higly unlikely) and if we
        fail we do not go chaos (user will see the first pm as if it was message
        in room's public chat) and after that all okay
        if jid.find('/') > -1:
            possible_room_jid = jid.split('/', 1)[0]
            return self.jid_is_room_jid(possible_room_jid)
            # it's not a full jid, so it's not a pm one
            return False

    def jid_is_room_jid(self, jid):
        Return True if it's a room jid, False if it's not, None if we don't know
        self.cur.execute('SELECT type FROM jids WHERE jid=?', (jid,))
        row = self.cur.fetchone()
        if row is None:
            return None
            if row.type == JIDConstant.ROOM_TYPE:
                return True
            return False

    def _get_family_jids(account, jid):
        Get all jids of the metacontacts family

        :param account: The account

        :param jid:     The JID

        returns a list of JIDs'
        family = gajim.contacts.get_metacontacts_family(account, jid)
        if family:
            return [user['jid'] for user in family]
        return [jid]

    def get_jid_id(self, jid, typestr=None):
        jids table has jid and jid_id logs table has log_id, jid_id,
        contact_name, time, kind, show, message so to ask logs we need jid_id
        that matches our jid in jids table this method wants jid and returns the
        jid_id for later sql-ing on logs typestr can be 'ROOM' or anything else
        depending on the type of JID and is only needed to be specified when the
        JID is new in DB
        if jid.find('/') != -1: # if it has a /
            jid_is_from_pm = self.jid_is_from_pm(jid)
            if not jid_is_from_pm: # it's normal jid with resource
                jid = jid.split('/', 1)[0] # remove the resource
        if jid in self.jids_already_in: # we already have jids in DB
            self.cur.execute('SELECT jid_id FROM jids WHERE jid=?', [jid])
            row = self.cur.fetchone()
            if row:
                return row.jid_id
        # oh! a new jid :), we add it now
        if typestr == 'ROOM':
            typ = JIDConstant.ROOM_TYPE
            typ = JIDConstant.NORMAL_TYPE
            self.cur.execute('INSERT INTO jids (jid, type) VALUES (?, ?)', (jid,
        except sqlite.IntegrityError:
            # Jid already in DB, maybe added by another instance. re-read DB
            return self.get_jid_id(jid, typestr)
        except sqlite.OperationalError as e:
            raise exceptions.PysqliteOperationalError(str(e))
        jid_id = self.cur.lastrowid
        return jid_id

    def convert_human_values_to_db_api_values(self, kind, show):
        Convert from string style to constant ints for db
        if kind == 'status':
            kind_col = KindConstant.STATUS
        elif kind == 'gcstatus':
            kind_col = KindConstant.GCSTATUS
        elif kind == 'gc_msg':
            kind_col = KindConstant.GC_MSG
        elif kind == 'single_msg_recv':
            kind_col = KindConstant.SINGLE_MSG_RECV
        elif kind == 'single_msg_sent':
            kind_col = KindConstant.SINGLE_MSG_SENT
        elif kind == 'chat_msg_recv':
            kind_col = KindConstant.CHAT_MSG_RECV
        elif kind == 'chat_msg_sent':
            kind_col = KindConstant.CHAT_MSG_SENT
        elif kind == 'error':
            kind_col = KindConstant.ERROR

        if show == 'online':
            show_col = ShowConstant.ONLINE
        elif show == 'chat':
            show_col = ShowConstant.CHAT
        elif show == 'away':
            show_col = ShowConstant.AWAY
        elif show == 'xa':
            show_col = ShowConstant.XA
        elif show == 'dnd':
            show_col = ShowConstant.DND
        elif show == 'offline':
            show_col = ShowConstant.OFFLINE
        elif show is None:
            show_col = None
        else: # invisible in GC when someone goes invisible
            # it's a RFC violation .... but we should not crash
            show_col = 'UNKNOWN'

        return kind_col, show_col

    def convert_human_transport_type_to_db_api_values(self, type_):
        Convert from string style to constant ints for db
        if type_ == 'aim':
            return TypeConstant.AIM
        if type_ == 'gadu-gadu':
            return TypeConstant.GG
        if type_ == 'http-ws':
            return TypeConstant.HTTP_WS
        if type_ == 'icq':
            return TypeConstant.ICQ
        if type_ == 'msn':
            return TypeConstant.MSN
        if type_ == 'qq':
            return TypeConstant.QQ
        if type_ == 'sms':
            return TypeConstant.SMS
        if type_ == 'smtp':
            return TypeConstant.SMTP
        if type_ in ('tlen', 'x-tlen'):
            return TypeConstant.TLEN
        if type_ == 'yahoo':
            return TypeConstant.YAHOO
        if type_ == 'newmail':
            return TypeConstant.NEWMAIL
        if type_ == 'rss':
            return TypeConstant.RSS
        if type_ == 'weather':
            return TypeConstant.WEATHER
        if type_ == 'mrim':
            return TypeConstant.MRIM
        if type_ == 'jabber':
            return TypeConstant.NO_TRANSPORT
        return None

    def convert_api_values_to_human_transport_type(self, type_id):
        Convert from constant ints for db to string style
        if type_id == TypeConstant.AIM:
            return 'aim'
        if type_id == TypeConstant.GG:
            return 'gadu-gadu'
        if type_id == TypeConstant.HTTP_WS:
            return 'http-ws'
        if type_id == TypeConstant.ICQ:
            return 'icq'
        if type_id == TypeConstant.MSN:
            return 'msn'
        if type_id == TypeConstant.QQ:
            return 'qq'
        if type_id == TypeConstant.SMS:
            return 'sms'
        if type_id == TypeConstant.SMTP:
            return 'smtp'
        if type_id == TypeConstant.TLEN:
            return 'tlen'
        if type_id == TypeConstant.YAHOO:
            return 'yahoo'
        if type_id == TypeConstant.NEWMAIL:
            return 'newmail'
        if type_id == TypeConstant.RSS:
            return 'rss'
        if type_id == TypeConstant.WEATHER:
            return 'weather'
        if type_id == TypeConstant.MRIM:
            return 'mrim'
        if type_id == TypeConstant.NO_TRANSPORT:
            return 'jabber'

    def convert_human_subscription_values_to_db_api_values(self, sub):
        Convert from string style to constant ints for db
        if sub == 'none':
            return SubscriptionConstant.NONE
        if sub == 'to':
            return SubscriptionConstant.TO
        if sub == 'from':
            return SubscriptionConstant.FROM
        if sub == 'both':
            return SubscriptionConstant.BOTH

    def convert_db_api_values_to_human_subscription_values(self, sub):
        Convert from constant ints for db to string style
        if sub == SubscriptionConstant.NONE:
            return 'none'
        if sub == SubscriptionConstant.TO:
            return 'to'
        if sub == SubscriptionConstant.FROM:
            return 'from'
        if sub == SubscriptionConstant.BOTH:
            return 'both'

    def commit_to_db(self, values, write_unread=False):
        sql = '''INSERT INTO logs (jid_id, contact_name, time, kind, show,
                message, subject, additional_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'''
            self.cur.execute(sql, values)
        except sqlite.OperationalError as e:
            raise exceptions.PysqliteOperationalError(str(e))
        except sqlite.DatabaseError:
            raise exceptions.DatabaseMalformed(LOG_DB_PATH)
        message_id = None
        if write_unread:
                message_id = self.cur.lastrowid
            except sqlite.OperationalError as e:
                print(str(e), file=sys.stderr)
        if message_id:
            self.insert_unread_events(message_id, values[0])
        return message_id

    def insert_unread_events(self, message_id, jid_id):
        Add unread message with id: message_id
        sql = 'INSERT INTO unread_messages VALUES (%d, %d, 0)' % (message_id,

    def set_read_messages(self, message_ids):
        Mark all messages with ids in message_ids as read
        ids = ','.join([str(i) for i in message_ids])
        sql = 'DELETE FROM unread_messages WHERE message_id IN (%s)' % ids

    def set_shown_unread_msgs(self, msg_log_id):
        Mark unread message as shown un GUI
        sql = 'UPDATE unread_messages SET shown = 1 where message_id = %s' % \

    def reset_shown_unread_messages(self):
        Set shown field to False in unread_messages table
        sql = 'UPDATE unread_messages SET shown = 0'

    def get_unread_msgs(self):
        Get all unread messages
        all_messages = []
                    'SELECT message_id, shown from unread_messages')
            unread_results = self.cur.fetchall()
        except Exception:
            unread_results = []
        for message in unread_results:
            msg_log_id = message.message_id
            shown = message.shown
            # here we get infos for that message, and related jid from jids table
            # do NOT change order of SELECTed things, unless you change function(s)
            # that called this function
                    SELECT logs.log_line_id, logs.message, logs.time, logs.subject,
                    jids.jid, logs.additional_data
                    FROM logs, jids
                    WHERE logs.log_line_id = %d AND logs.jid_id = jids.jid_id
                    ''' % msg_log_id
            results = self.cur.fetchone()
            if len(results) == 0:
                # Log line is no more in logs table. remove it from unread_messages

            all_messages.append((results, shown))
        return all_messages

    def write(self, kind, jid, message=None, show=None, tim=None, subject=None,
              additional_data=None, mam_query=False):
        Write a row (status, gcstatus, message etc) to logs database

        kind can be status, gcstatus, gc_msg, (we only recv for those 3),
        single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent we cannot
        know if it is pm or normal chat message, we try to guess see

        We analyze jid and store it as follows:
                jids.jid text column will hold JID if TC-related, room_jid if GC-related,
                ROOM_JID/nick if pm-related.

        if additional_data is None:
            additional_data = {}
        if self.jids_already_in == []: # only happens if we just created the db

        contact_name_col = None # holds nickname for kinds gcstatus, gc_msg
        # message holds the message unless kind is status or gcstatus,
        # then it holds status message
        message_col = message
        subject_col = subject
        additional_data_col = json.dumps(additional_data)
        if tim:
            time_col = float(tim)
            time_col = float(time.time())

        kind_col, show_col = self.convert_human_values_to_db_api_values(kind,

        write_unread = False
            # now we may have need to do extra care for some values in columns
            if kind == 'status': # we store (not None) time, jid, show, msg
                # status for roster items
                jid_id = self.get_jid_id(jid)
                if show is None: # show is None (xmpp), but we say that 'online'
                    show_col = ShowConstant.ONLINE

            elif kind == 'gcstatus':
                # status in ROOM (for pm status see status)
                if show is None: # show is None (xmpp), but we say that 'online'
                    show_col = ShowConstant.ONLINE
                jid, nick = jid.split('/', 1)

                # re-get jid_id for the new jid
                jid_id = self.get_jid_id(jid, 'ROOM')
                contact_name_col = nick

            elif kind == 'gc_msg':
                if jid.find('/') != -1: # if it has a /
                    jid, nick = jid.split('/', 1)
                    # it's server message f.e. error message
                    # when user tries to ban someone but he's not allowed to
                    nick = None

                # re-get jid_id for the new jid
                jid_id = self.get_jid_id(jid, 'ROOM')

                contact_name_col = nick
                jid_id = self.get_jid_id(jid)
                if kind == 'chat_msg_recv':
                    if not self.jid_is_from_pm(jid) and not mam_query:
                        # Save in unread table only if it's not a pm
                        write_unread = True

            if show_col == 'UNKNOWN': # unknown show, do not log

            values = (jid_id, contact_name_col, time_col, kind_col, show_col,
                    message_col, subject_col, additional_data_col)
            return self.commit_to_db(values, write_unread)

        except (exceptions.DatabaseMalformed,
                exceptions.PysqliteOperationalError) as error:
            self.dispatch('DB_ERROR', error)

    def get_last_conversation_lines(self, account, jid, pending):
        Get recent messages

        Pending messages are already in queue to be printed when the
        ChatControl is opened, so we dont want to request those messages.
        How many messages are requested depends on the 'restore_lines'
        config value. How far back in time messages are requested depends on

        :param account: The account

        :param jid:     The jid from which we request the conversation lines

        :param pending: How many messages are currently pending so we dont
                        request those messages

        returns a list of namedtuples

        restore = gajim.config.get('restore_lines')
        if restore <= 0:
            return []

        kinds = map(str, [KindConstant.SINGLE_MSG_RECV,

        jids = self._get_family_jids(account, jid)

        sql = '''
            SELECT time, kind, message, subject, additional_data
            FROM logs NATURAL JOIN jids WHERE jid IN ({jids}) AND
            kind IN ({kinds}) AND time > get_timeout()
            ORDER BY time DESC, log_line_id DESC LIMIT ? OFFSET ?
            '''.format(jids=', '.join('?' * len(jids)),
                       kinds=', '.join(kinds))

            messages = self.con.execute(
                sql, (*jids, restore, pending)).fetchall()
        except sqlite.DatabaseError:
            return []

        return messages

    def get_unix_time_from_date(self, year, month, day):
        # year (fe 2005), month (fe 11), day (fe 25)
        # returns time in seconds for the second that starts that date since epoch
        # gimme unixtime from year month day:
        d = datetime.date(year, month, day)
        local_time = d.timetuple() # time tuple (compat with time.localtime())
        # we have time since epoch baby :)
        start_of_day = int(time.mktime(local_time))
        return start_of_day

    def get_conversation_for_date(self, account, jid, date):
        Load the complete conversation with a given jid on a specific date

        :param account: The account

        :param jid:     The jid for which we request the conversation

        :param date:    datetime.datetime instance
                        example: datetime.datetime(year, month, day)

        returns a list of namedtuples

        jids = self._get_family_jids(account, jid)

        delta = datetime.timedelta(
            hours=23, minutes=59, seconds=59, microseconds=999999)

        sql = '''
            SELECT contact_name, time, kind, show, message, subject,
                   additional_data, log_line_id
            FROM logs NATURAL JOIN jids WHERE jid IN ({jids})
            AND time BETWEEN ? AND ?
            ORDER BY time, log_line_id
            '''.format(jids=', '.join('?' * len(jids)))

        return self.con.execute(sql, (*jids, 
                                      (date + delta).timestamp())).fetchall()

    def search_log(self, account, jid, query, date=None):
        Search the conversation log for messages containing the `query` string.

        The search can either span the complete log for the given
        `account` and `jid` or be restriced to a single day by
        specifying `date`.

        :param account: The account

        :param jid:     The jid for which we request the conversation

        :param query:   A search string

        :param date:    datetime.datetime instance
                        example: datetime.datetime(year, month, day)

        returns a list of namedtuples
        jids = self._get_family_jids(account, jid)

        if date:
            delta = datetime.timedelta(
                hours=23, minutes=59, seconds=59, microseconds=999999)

            between = '''
                AND time BETWEEN {start} AND {end}
                           end=(date + delta).timestamp())

        sql = '''
        SELECT contact_name, time, kind, show, message, subject,
               additional_data, log_line_id
        FROM logs NATURAL JOIN jids WHERE jid IN ({jids})
        AND message LIKE like(?) {date_search}
        ORDER BY time, log_line_id
        '''.format(jids=', '.join('?' * len(jids)),
                   date_search=between if date else '')

        return self.con.execute(sql, (*jids, query)).fetchall()

    def get_days_with_logs(self, account, jid, year, month):
        Request the days in a month where we received messages
        for a given `jid`.

        :param account: The account

        :param jid:     The jid for which we request the days

        :param year:    The year

        :param month:   The month

        returns a list of namedtuples
        jids = self._get_family_jids(account, jid)

        kinds = map(str, [KindConstant.STATUS,

        # Calculate the start and end datetime of the month
        date = datetime.datetime(year, month, 1)
        days = calendar.monthrange(year, month)[1] - 1
        delta = datetime.timedelta(
            days=days, hours=23, minutes=59, seconds=59, microseconds=999999)

        sql = """
            SELECT DISTINCT 
            CAST(strftime('%d', time, 'unixepoch', 'localtime') AS INTEGER)
            AS day FROM logs NATURAL JOIN jids WHERE jid IN ({jids})
            AND time BETWEEN ? AND ?
            AND kind NOT IN ({kinds})
            ORDER BY time
            """.format(jids=', '.join('?' * len(jids)),
                       kinds=', '.join(kinds))

        return self.con.execute(sql, (*jids,
                                      (date + delta).timestamp())).fetchall()

    def get_last_date_that_has_logs(self, account, jid):
        Get the timestamp of the last message we received for the jid.

        :param account: The account

        :param jid:     The jid for which we request the last timestamp

        returns a timestamp or None
        jids = self._get_family_jids(account, jid)

        kinds = map(str, [KindConstant.STATUS,

        sql = '''
            SELECT MAX(time) as time FROM logs
            NATURAL JOIN jids WHERE jid IN ({jids})
            AND kind NOT IN ({kinds})
            '''.format(jids=', '.join('?' * len(jids)),
                       kinds=', '.join(kinds))

        # fetchone() returns always at least one Row with all
        # attributes set to None because of the MAX() function
        return self.con.execute(sql, (*jids,)).fetchone().time

    def get_room_last_message_time(self, account, jid):
        Get the timestamp of the last message we received in a room.

        :param account: The account

        :param jid:     The jid for which we request the last timestamp

        returns a timestamp or None
        sql = '''
            SELECT time FROM rooms_last_message_time
            NATURAL JOIN jids WHERE jid = ?

        row = self.con.execute(sql, (jid,)).fetchone()
        if not row:
            return self.get_last_date_that_has_logs(account, jid)
        return row.time

    def set_room_last_message_time(self, jid, time):
        Set last time (in seconds since EPOCH) for which we had logs for that
        room in rooms_last_message_time table
        jid_id = self.get_jid_id(jid, 'ROOM')
        # jid_id is unique in this table, create or update :
        sql = 'REPLACE INTO rooms_last_message_time VALUES (%d, %d)' % \
                (jid_id, time)

    def save_transport_type(self, jid, type_):
        Save the type of the transport in DB
        type_id = self.convert_human_transport_type_to_db_api_values(type_)
        if not type_id:
            # unknown type
                'SELECT type from transports_cache WHERE transport = "%s"' % jid)
        results = self.cur.fetchone()
        if results:
            if results.type == type_id:
            sql = 'UPDATE transports_cache SET type = %d WHERE transport = "%s"' %\
                    (type_id, jid)
        sql = 'INSERT INTO transports_cache VALUES ("%s", %d)' % (jid, type_id)

    def get_transports_type(self):
        Return all the type of the transports in DB
                'SELECT * from transports_cache')
        results = self.cur.fetchall()
        if not results:
            return {}
        answer = {}
        for result in results:
            answer[result.transport] = self.convert_api_values_to_human_transport_type(
        return answer

    # A longer note here:
    # The database contains a blob field. Pysqlite seems to need special care for
    # such fields.
    # When storing, we need to convert string into buffer object (1).
    # When retrieving, we need to convert it back to a string to decompress it.
    # (2)
    # GzipFile needs a file-like object, StringIO emulates file for plain strings
    def iter_caps_data(self):
        Iterate over caps cache data stored in the database

        The iterator values are pairs of (node, ver, ext, identities, features):
        identities == {'category':'foo', 'type':'bar', 'name':'boo'},
        features being a list of feature namespaces.
        # get data from table
        # the data field contains binary object (gzipped data), this is a hack
        # to get that data without trying to convert it to unicode
            self.cur.execute('SELECT hash_method, hash, data FROM caps_cache;')
        except sqlite.OperationalError:
            # might happen when there's no caps_cache table yet
            # -- there's no data to read anyway then

        # list of corrupted entries that will be removed
        to_be_removed = []
        for row in self.cur:
            # for each row: unpack the data field
            # (format: (category, type, name, category, type, name, ...
            #   ..., 'FEAT', feature1, feature2, ...).join(' '))
            # NOTE: if there's a need to do more gzip, put that to a function
                data = GzipFile(fileobj=BytesIO(row.data)).read().decode('utf-8').split('\0')
            except IOError:
                # This data is corrupted. It probably contains non-ascii chars
                to_be_removed.append((row.hash_method, row.hash))
            i = 0
            identities = list()
            features = list()
            while i < (len(data) - 3) and data[i] != 'FEAT':
                category = data[i]
                type_ = data[i + 1]
                lang = data[i + 2]
                name = data[i + 3]
                identities.append({'category': category, 'type': type_,
                        'xml:lang': lang, 'name': name})
                i += 4
            while i < len(data):
                i += 1

            # yield the row
            yield row.hash_method, row.hash, identities, features
        for hash_method, hash_ in to_be_removed:
            sql = '''DELETE FROM caps_cache WHERE hash_method = "%s" AND
                    hash = "%s"''' % (hash_method, hash_)

    def add_caps_entry(self, hash_method, hash_, identities, features):
        data = []
        for identity in identities:
            # there is no FEAT category
            if identity['category'] == 'FEAT':
            data.extend((identity.get('category'), identity.get('type', ''),
                    identity.get('xml:lang', ''), identity.get('name', '')))
        data = '\0'.join(data)
        # if there's a need to do more gzip, put that to a function
        string = BytesIO()
        gzip = GzipFile(fileobj=string, mode='w')
        data = string.getvalue()
                INSERT INTO caps_cache ( hash_method, hash, data, last_seen )
                VALUES (?, ?, ?, ?);
                ''', (hash_method, hash_, memoryview(data), int(time.time())))
        # (1) -- note above

    def update_caps_time(self, method, hash_):
        sql = '''UPDATE caps_cache SET last_seen = %d
                WHERE hash_method = "%s" and hash = "%s"''' % \
                (int(time.time()), method, hash_)

    def clean_caps_table(self):
        Remove caps which was not seen for 3 months
        sql = '''DELETE FROM caps_cache WHERE last_seen < %d''' % \
                int(time.time() - 3*30*24*3600)

    def replace_roster(self, account_name, roster_version, roster):
        Replace current roster in DB by a new one

        accout_name is the name of the account to change.
        roster_version is the version of the new roster.
        roster is the new version.
        # First we must reset roster_version value to ensure that the server
        # sends back all the roster at the next connexion if the replacement
        # didn't work properly.
        gajim.config.set_per('accounts', account_name, 'roster_version', '')

        account_jid = gajim.get_jid_from_account(account_name)
        account_jid_id = self.get_jid_id(account_jid)

        # Delete old roster

        # Fill roster tables with the new roster
        for jid in roster:
            self.add_or_update_contact(account_jid, jid, roster[jid]['name'],
                roster[jid]['subscription'], roster[jid]['ask'],
                roster[jid]['groups'], commit=False)

        # At this point, we are sure the replacement works properly so we can
        # set the new roster_version value.
        gajim.config.set_per('accounts', account_name, 'roster_version',

    def del_contact(self, account_jid, jid):
        Remove jid from account_jid roster
            account_jid_id = self.get_jid_id(account_jid)
            jid_id = self.get_jid_id(jid)
        except exceptions.PysqliteOperationalError as e:
            raise exceptions.PysqliteOperationalError(str(e))
                'DELETE FROM roster_group WHERE account_jid_id=? AND jid_id=?',
                (account_jid_id, jid_id))
                'DELETE FROM roster_entry WHERE account_jid_id=? AND jid_id=?',
                (account_jid_id, jid_id))

    def add_or_update_contact(self, account_jid, jid, name, sub, ask, groups,
        Add or update a contact from account_jid roster
        if sub == 'remove':
            self.del_contact(account_jid, jid)

            account_jid_id = self.get_jid_id(account_jid)
            jid_id = self.get_jid_id(jid)
        except exceptions.PysqliteOperationalError as e:
            raise exceptions.PysqliteOperationalError(str(e))

        # Update groups information
        # First we delete all previous groups information
                'DELETE FROM roster_group WHERE account_jid_id=? AND jid_id=?',
                (account_jid_id, jid_id))
        # Then we add all new groups information
        for group in groups:
            self.cur.execute('INSERT INTO roster_group VALUES(?, ?, ?)',
                    (account_jid_id, jid_id, group))

        if name is None:
            name = ''

        self.cur.execute('REPLACE INTO roster_entry VALUES(?, ?, ?, ?, ?)',
                (account_jid_id, jid_id, name,
        if commit:

    def get_roster(self, account_jid):
        Return the accound_jid roster in NonBlockingRoster format
        data = {}
        account_jid_id = self.get_jid_id(account_jid)

        # First we fill data with roster_entry informations
                SELECT j.jid, re.jid_id, re.name, re.subscription, re.ask
                FROM roster_entry re, jids j
                WHERE re.account_jid_id=? AND j.jid_id=re.jid_id''', (account_jid_id,))
        for row in self.cur:
            #jid, jid_id, name, subscription, ask
            jid = row.jid
            name = row.name
            data[jid] = {}
            if name:
                data[jid]['name'] = name
                data[jid]['name'] = None
            data[jid]['subscription'] = \
            data[jid]['groups'] = []
            data[jid]['resources'] = {}
            if row.ask:
                data[jid]['ask'] = 'subscribe'
                data[jid]['ask'] = None
            data[jid]['id'] = row.jid_id

        # Then we add group for roster entries
        for jid in data:
                    SELECT group_name FROM roster_group
                    WHERE account_jid_id=? AND jid_id=?''',
                    (account_jid_id, data[jid]['id']))
            for row in self.cur:
                group_name = row.group_name
            del data[jid]['id']

        return data

    def remove_roster(self, account_jid):
        Remove all entry from account_jid roster
        account_jid_id = self.get_jid_id(account_jid)

        self.cur.execute('DELETE FROM roster_entry WHERE account_jid_id=?',
        self.cur.execute('DELETE FROM roster_group WHERE account_jid_id=?',

    def save_if_not_exists(self, with_, direction, tim, msg='', is_pm=False, additional_data=None):
        if additional_data is None:
            additional_data = {}

        if not msg:
        if is_pm:
            type_ = 'gc_msg'
            if direction == 'from':
                type_ = 'chat_msg_recv'
            elif direction == 'to':
                type_ = 'chat_msg_sent'

        start_time = tim - 300 # 5 minutes arrount given time
        end_time = tim + 300 # 5 minutes arrount given time

        sql = '''
            SELECT * FROM logs
            NATURAL JOIN jids WHERE jid = ? AND message = ?
            AND time BETWEEN ? AND ?

        result = self.con.execute(sql, (str(with_), msg, start_time, end_time)).fetchone()

        if result:
            log.debug('Log already in DB, ignoring it')
        log.debug('New log received from server archives, storing it')
        self.write(type_, str(with_), message=msg, tim=tim,
                   additional_data=additional_data, mam_query=True)

    def _nec_gc_message_received(self, obj):
        tim_f = float(obj.timestamp)
        tim_int = int(tim_f)
        if gajim.config.should_log(obj.conn.name, obj.jid) and not \
        tim_int < obj.conn.last_history_time[obj.jid] and obj.msgtxt and \
            # if not obj.nick, it means message comes from room itself
            # usually it hold description and can be send at each connection
            # so don't store it in logs
            self.write('gc_msg', obj.fjid, obj.msgtxt, tim=obj.timestamp, additional_data=obj.additional_data)
            # store in memory time of last message logged.
            # this will also be saved in rooms_last_message_time table
            # when we quit this muc
            obj.conn.last_history_time[obj.jid] = tim_f