2005-11-22 00:33:09 +01:00
|
|
|
#!/usr/bin/env python
|
2005-11-22 00:19:31 +01:00
|
|
|
import os
|
|
|
|
import sre
|
2005-11-22 15:52:04 +01:00
|
|
|
import sys
|
2005-11-24 14:27:09 +01:00
|
|
|
import time
|
|
|
|
import signal
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, signal.SIG_DFL) # ^C exits the application
|
2005-11-22 00:19:31 +01:00
|
|
|
|
|
|
|
from pysqlite2 import dbapi2 as sqlite
|
|
|
|
|
2005-11-26 00:23:25 +01:00
|
|
|
|
|
|
|
class Constants:
|
|
|
|
def __init__(self):
|
|
|
|
(
|
|
|
|
self.JID_NORMAL_TYPE,
|
|
|
|
self.JID_ROOM_TYPE # image to show state (online, new message etc)
|
|
|
|
) = range(2)
|
|
|
|
|
|
|
|
(
|
|
|
|
self.KIND_STATUS,
|
|
|
|
self.KIND_GCSTATUS,
|
|
|
|
self.KIND_GC_MSG,
|
|
|
|
self.KIND_SINGLE_MSG_RECV,
|
|
|
|
self.KIND_CHAT_MSG_RECV,
|
|
|
|
self.KIND_SINGLE_MSG_SENT,
|
|
|
|
self.KIND_CHAT_MSG_SENT
|
|
|
|
) = range(7)
|
|
|
|
|
|
|
|
(
|
|
|
|
self.SHOW_ONLINE,
|
|
|
|
self.SHOW_CHAT,
|
|
|
|
self.SHOW_AWAY,
|
|
|
|
self.SHOW_XA,
|
|
|
|
self.SHOW_DND,
|
|
|
|
self.SHOW_OFFLINE
|
|
|
|
) = range(6)
|
|
|
|
|
|
|
|
constants = Constants()
|
|
|
|
|
2005-11-22 12:21:48 +01:00
|
|
|
if os.name == 'nt':
|
2005-11-22 12:24:10 +01:00
|
|
|
try:
|
2005-11-22 12:25:04 +01:00
|
|
|
PATH_TO_LOGS_BASE_DIR = os.path.join(os.environ['appdata'], 'Gajim', 'Logs')
|
2005-11-22 12:53:56 +01:00
|
|
|
PATH_TO_DB = os.path.join(os.environ['appdata'], 'Gajim', 'logs.db') # database is called logs.db
|
2005-11-22 12:24:10 +01:00
|
|
|
except KeyError:
|
|
|
|
# win9x
|
2005-11-25 16:58:20 +01:00
|
|
|
PATH_TO_LOGS_BASE_DIR = '../src/Logs'
|
|
|
|
PATH_TO_DB = '../src/logs.db'
|
2005-11-22 12:21:48 +01:00
|
|
|
else:
|
|
|
|
PATH_TO_LOGS_BASE_DIR = os.path.expanduser('~/.gajim/logs')
|
2005-11-26 12:58:53 +01:00
|
|
|
PATH_TO_DB = os.path.expanduser('~/.gajim/logs.db') # database is called logs.db
|
2005-11-22 00:19:31 +01:00
|
|
|
|
2005-11-22 15:52:04 +01:00
|
|
|
if os.path.exists(PATH_TO_DB):
|
2005-11-22 15:52:37 +01:00
|
|
|
print '%s already exists. Exiting..' % PATH_TO_DB
|
2005-11-22 15:52:04 +01:00
|
|
|
sys.exit()
|
2005-11-26 00:23:25 +01:00
|
|
|
|
2005-11-22 12:21:48 +01:00
|
|
|
jids_already_in = [] # jid we already put in DB
|
|
|
|
con = sqlite.connect(PATH_TO_DB)
|
2005-11-26 00:23:25 +01:00
|
|
|
os.chmod(PATH_TO_DB, 0600) # rw only for us
|
2005-11-22 00:19:31 +01:00
|
|
|
cur = con.cursor()
|
|
|
|
# create the tables
|
2005-11-23 01:06:29 +01:00
|
|
|
# kind can be
|
|
|
|
# status, gcstatus, gc_msg, (we only recv for those 3),
|
|
|
|
# single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent
|
|
|
|
# to meet all our needs
|
2005-11-22 15:52:04 +01:00
|
|
|
# logs.jid_id --> jids.jid_id but Sqlite doesn't do FK etc so it's done in python code
|
2005-11-22 00:19:31 +01:00
|
|
|
cur.executescript(
|
|
|
|
'''
|
|
|
|
CREATE TABLE jids(
|
2005-11-22 16:38:03 +01:00
|
|
|
jid_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
2005-11-26 00:23:25 +01:00
|
|
|
jid TEXT UNIQUE,
|
|
|
|
type INTEGER
|
2005-11-22 00:19:31 +01:00
|
|
|
);
|
|
|
|
|
|
|
|
CREATE TABLE logs(
|
|
|
|
log_line_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
|
|
|
|
jid_id INTEGER,
|
|
|
|
contact_name TEXT,
|
|
|
|
time INTEGER,
|
2005-11-26 00:23:25 +01:00
|
|
|
kind INTEGER,
|
|
|
|
show INTEGER,
|
|
|
|
message TEXT,
|
|
|
|
subject TEXT
|
2005-11-22 00:19:31 +01:00
|
|
|
);
|
|
|
|
'''
|
|
|
|
)
|
|
|
|
|
|
|
|
con.commit()
|
|
|
|
|
2005-11-22 00:47:22 +01:00
|
|
|
# (?<!\\) is a lookbehind assertion which asks anything but '\'
|
|
|
|
# to match the regexp that follows it
|
|
|
|
re = sre.compile(r'(?<!\\)\\n')
|
2005-11-22 00:19:31 +01:00
|
|
|
|
2005-11-22 00:47:22 +01:00
|
|
|
def from_one_line(msg):
|
2005-11-22 00:19:31 +01:00
|
|
|
# So here match '\\n' but not if you have a '\' before that
|
|
|
|
msg = re.sub('\n', msg)
|
|
|
|
msg = msg.replace('\\\\', '\\')
|
|
|
|
# s12 = 'test\\ntest\\\\ntest'
|
|
|
|
# s13 = re.sub('\n', s12)
|
|
|
|
# s14 s13.replace('\\\\', '\\')
|
|
|
|
# s14
|
|
|
|
# 'test\ntest\\ntest'
|
|
|
|
return msg
|
|
|
|
|
|
|
|
|
|
|
|
def get_jid(dirname, filename):
|
2005-11-22 15:52:04 +01:00
|
|
|
# jids.jid text column will be JID if TC-related, room_jid if GC-related,
|
|
|
|
# ROOM_JID/nick if pm-related. Here I get names from filenames
|
2005-11-22 12:18:08 +01:00
|
|
|
if dirname.endswith('logs') or dirname.endswith('Logs'):
|
2005-11-22 00:19:31 +01:00
|
|
|
# we have file (not dir) in logs base dir, so it's TC
|
|
|
|
jid = filename # file is JID
|
|
|
|
else:
|
|
|
|
# we are in a room folder (so it can be either pm or message in room)
|
|
|
|
if filename == os.path.basename(dirname): # room/room
|
|
|
|
jid = dirname # filename is ROOM_JID
|
|
|
|
else: #room/nick it's pm
|
|
|
|
jid = dirname + '/' + filename
|
|
|
|
|
|
|
|
if jid.startswith('/'):
|
|
|
|
p = len(PATH_TO_LOGS_BASE_DIR)
|
2005-11-26 00:23:25 +01:00
|
|
|
jid = jid[p+1:]
|
2005-11-22 00:19:31 +01:00
|
|
|
jid = jid.lower()
|
|
|
|
return jid
|
|
|
|
|
2005-11-26 12:55:55 +01:00
|
|
|
def decode_jid(string):
|
|
|
|
'''try to decode (to make it Unicode instance) given jid'''
|
2005-11-25 17:58:49 +01:00
|
|
|
# by the time we go to iso15 it better be the one else we show bad characters
|
|
|
|
encodings = (sys.getfilesystemencoding(), 'utf-8', 'iso-8859-15')
|
|
|
|
for encoding in encodings:
|
|
|
|
try:
|
|
|
|
string = string.decode(encoding)
|
|
|
|
except UnicodeError:
|
|
|
|
continue
|
|
|
|
return string
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
2005-11-22 00:19:31 +01:00
|
|
|
def visit(arg, dirname, filenames):
|
2005-11-23 01:06:29 +01:00
|
|
|
print 'Visiting', dirname
|
2005-11-22 00:19:31 +01:00
|
|
|
for filename in filenames:
|
2005-11-22 12:13:54 +01:00
|
|
|
# Don't take this file into account, this is dup info
|
|
|
|
# notifications are also in contact log file
|
2005-11-26 01:03:09 +01:00
|
|
|
if filename in ('notify.log', 'readme'):
|
2005-11-22 12:06:39 +01:00
|
|
|
continue
|
2005-11-22 00:19:31 +01:00
|
|
|
path_to_text_file = os.path.join(dirname, filename)
|
|
|
|
if os.path.isdir(path_to_text_file):
|
|
|
|
continue
|
|
|
|
|
|
|
|
jid = get_jid(dirname, filename)
|
2005-11-26 12:55:55 +01:00
|
|
|
|
|
|
|
jid = decode_jid(jid)
|
|
|
|
if not jid:
|
|
|
|
continue
|
|
|
|
|
2005-11-26 00:23:25 +01:00
|
|
|
if filename == os.path.basename(dirname): # gajim@conf/gajim@conf then gajim@conf is type room
|
2005-11-26 12:55:55 +01:00
|
|
|
jid_type = constants.JID_ROOM_TYPE
|
2005-11-26 14:06:02 +01:00
|
|
|
print 'Processing', jid.encode('utf-8'), 'of type room'
|
2005-11-26 00:23:25 +01:00
|
|
|
else:
|
2005-11-26 12:55:55 +01:00
|
|
|
jid_type = constants.JID_NORMAL_TYPE
|
2005-11-26 14:06:02 +01:00
|
|
|
print 'Processing', jid.encode('utf-8'), 'of type normal'
|
2005-11-26 00:23:25 +01:00
|
|
|
|
2005-11-26 12:55:55 +01:00
|
|
|
JID_ID = None
|
2005-11-22 00:19:31 +01:00
|
|
|
f = open(path_to_text_file, 'r')
|
|
|
|
lines = f.readlines()
|
|
|
|
for line in lines:
|
|
|
|
line = from_one_line(line)
|
|
|
|
splitted_line = line.split(':')
|
|
|
|
if len(splitted_line) > 2:
|
2005-11-23 01:06:29 +01:00
|
|
|
# type in logs is one of
|
2005-11-22 12:06:39 +01:00
|
|
|
# 'gc', 'gcstatus', 'recv', 'sent' and if nothing of those
|
|
|
|
# it is status
|
2005-11-23 01:06:29 +01:00
|
|
|
# new db has:
|
|
|
|
# status, gcstatus, gc_msg, (we only recv those 3),
|
|
|
|
# single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent
|
|
|
|
# to meet all our needs
|
|
|
|
# here I convert
|
|
|
|
# gc ==> gc_msg, gcstatus ==> gcstatus, recv ==> chat_msg_recv
|
|
|
|
# sent ==> chat_msg_sent, status ==> status
|
2005-11-26 14:06:02 +01:00
|
|
|
typ = splitted_line[1] # line[1] has type of logged message
|
2005-11-22 12:06:39 +01:00
|
|
|
message_data = splitted_line[2:] # line[2:] has message data
|
2005-11-22 00:19:31 +01:00
|
|
|
# line[0] is date,
|
2005-11-22 16:38:03 +01:00
|
|
|
# some lines can be fucked up, just drop them
|
2005-11-22 12:06:39 +01:00
|
|
|
try:
|
|
|
|
tim = int(float(splitted_line[0]))
|
|
|
|
except:
|
|
|
|
continue
|
2005-11-26 12:55:55 +01:00
|
|
|
|
2005-11-22 00:19:31 +01:00
|
|
|
contact_name = None
|
|
|
|
show = None
|
2005-11-26 14:06:02 +01:00
|
|
|
if typ == 'gc':
|
2005-11-22 00:19:31 +01:00
|
|
|
contact_name = message_data[0]
|
|
|
|
message = ':'.join(message_data[1:])
|
2005-11-26 00:23:25 +01:00
|
|
|
kind = constants.KIND_GC_MSG
|
2005-11-26 14:06:02 +01:00
|
|
|
elif typ == 'gcstatus':
|
2005-11-22 00:19:31 +01:00
|
|
|
contact_name = message_data[0]
|
|
|
|
show = message_data[1]
|
|
|
|
message = ':'.join(message_data[2:]) # status msg
|
2005-11-26 00:23:25 +01:00
|
|
|
kind = constants.KIND_GCSTATUS
|
2005-11-26 14:06:02 +01:00
|
|
|
elif typ == 'recv':
|
2005-11-22 00:19:31 +01:00
|
|
|
message = ':'.join(message_data[0:])
|
2005-11-26 00:23:25 +01:00
|
|
|
kind = constants.KIND_CHAT_MSG_RECV
|
2005-11-26 14:06:02 +01:00
|
|
|
elif typ == 'sent':
|
2005-11-23 01:06:29 +01:00
|
|
|
message = ':'.join(message_data[0:])
|
2005-11-26 00:23:25 +01:00
|
|
|
kind = constants.KIND_CHAT_MSG_SENT
|
2005-11-22 00:19:31 +01:00
|
|
|
else: # status
|
2005-11-26 00:23:25 +01:00
|
|
|
kind = constants.KIND_STATUS
|
2005-11-22 00:19:31 +01:00
|
|
|
show = message_data[0]
|
|
|
|
message = ':'.join(message_data[1:]) # status msg
|
|
|
|
|
2005-11-26 12:55:55 +01:00
|
|
|
# message = decode_string(message)
|
2005-11-26 00:23:25 +01:00
|
|
|
message = message[:-1] # remove last \n
|
2005-11-25 19:02:41 +01:00
|
|
|
if not message:
|
|
|
|
continue
|
2005-11-26 12:55:55 +01:00
|
|
|
|
|
|
|
# jid is already in the DB, don't create a new row, just get his jid_id
|
|
|
|
if not JID_ID:
|
|
|
|
if jid in jids_already_in:
|
|
|
|
cur.execute('SELECT jid_id FROM jids WHERE jid = "%s"' % jid)
|
|
|
|
JID_ID = cur.fetchone()[0]
|
|
|
|
else:
|
|
|
|
jids_already_in.append(jid)
|
|
|
|
cur.execute('INSERT INTO jids (jid, type) VALUES (?, ?)',
|
|
|
|
(jid, jid_type))
|
|
|
|
con.commit()
|
|
|
|
JID_ID = cur.lastrowid
|
|
|
|
|
|
|
|
sql = 'INSERT INTO logs (jid_id, contact_name, time, kind, show, message) '\
|
|
|
|
'VALUES (?, ?, ?, ?, ?, ?)'
|
|
|
|
|
2005-11-23 01:06:29 +01:00
|
|
|
values = (JID_ID, contact_name, tim, kind, show, message)
|
2005-11-22 00:19:31 +01:00
|
|
|
cur.execute(sql, values)
|
|
|
|
con.commit()
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2005-11-24 14:27:09 +01:00
|
|
|
print 'IMPORTNANT: PLEASE READ http://trac.gajim.org/wiki/MigrateLogToDot9DB'
|
|
|
|
print 'Migration will start in 40 seconds unless you press Ctrl+C'
|
2005-11-26 00:23:25 +01:00
|
|
|
time.sleep(40) # give the user time to act
|
2005-11-24 14:27:09 +01:00
|
|
|
print
|
2005-11-24 14:20:53 +01:00
|
|
|
print 'Starting Logs Migration'
|
|
|
|
print '======================='
|
|
|
|
print 'Please do NOT run Gajim until this script is over'
|
2005-11-22 00:19:31 +01:00
|
|
|
os.path.walk(PATH_TO_LOGS_BASE_DIR, visit, None)
|
|
|
|
f = open(os.path.join(PATH_TO_LOGS_BASE_DIR, 'README'), 'w')
|
|
|
|
f.write('We do not use plain-text files anymore, because they do not scale.\n')
|
|
|
|
f.write('Those files here are logs for Gajim up until 0.8.2\n')
|
|
|
|
f.write('We now use an sqlite database called logs.db found in ~/.gajim\n')
|
2005-11-22 00:47:22 +01:00
|
|
|
f.write('You can always run the migration script to import your old logs to the database\n')
|
2005-11-22 00:19:31 +01:00
|
|
|
f.write('Thank you\n')
|
|
|
|
f.close()
|