"""Automatic generation of chat using Lorem Ipsum text and time series statistics."""
import os
from datetime import datetime, timedelta
import itertools
import numpy as np
import pandas as pd
from scipy.stats import lomax
from lorem import sentence
from emoji.unicode_codes import EMOJI_UNICODE
from whatstk.whatsapp.objects import WhatsAppChat
from whatstk.whatsapp.hformat import get_supported_hformats_as_list
from whatstk.utils.utils import COLNAMES_DF
USERS = [
'John', 'Mary', 'Giuseppe', '+1 123 456 789'
]
[docs]class ChatGenerator:
"""Generate a chat.
Args:
size (int): Number of messages to generate.
users (list, optional): List with names of the users. Defaults to module variable USERS.
seed (int, optional): Seed for random processes. Defaults to 100.
Examples:
This simple example loads a chat using :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`. Once
loaded, we can access its attribute ``df``, which contains the loaded chat as a DataFrame.
.. code-block:: python
>>> from whatstk.whatsapp.generation import ChatGenerator
>>> from datetime import datetime
>>> from whatstk.data import whatsapp_urls
>>> chat = ChatGenerator(size=10).generate(last_timestamp=datetime(2020, 1, 1, 0, 0))
>>> chat.df.head(5)
username message
date
2019-12-31 09:43:04.000525 John Quis labore laboris proident et deserunt.
2019-12-31 10:19:21.980039 +1 123 456 789 Non ullamco esse nulla voluptate. 🇩🇰
2019-12-31 13:56:45.575426 John Duis non ut officia, enim enim qui cupidatat a...
2019-12-31 15:47:29.995420 Giuseppe Non ut nulla laboris nostrud aute. 🏊🏻
2019-12-31 16:23:00.348542 John Tempor irure in velit tempor.
"""
def __init__(self, size, users=None, seed=100):
"""Instantiate ChatGenerator class.
Args:
size (int): Number of messages to generate.
users (list, optional): List with names of the users. Defaults to module variable USERS.
seed (int, optional): Seed for random processes. Defaults to 100.
"""
self.size = size
self.users = USERS if not users else users
self.seed = seed
np.random.seed(seed=self.seed)
def _generate_messages(self):
"""Generate list of messages.
To generate sentences, Lorem Ipsum is used.
Returns:
list: List with messages (as strings).
"""
emojis = self._generate_emojis()
s = sentence(count=self.size, comma=(0, 2), word_range=(4, 8))
sentences = list(itertools.islice(s, self.size))
messages = [sentences[i] + ' ' + emojis[i] for i in range(self.size)]
return messages
def _generate_emojis(self, k=1):
"""Generate random list of emojis.
Emojis are sampled from a list of `n` emojis and `k*n` empty strings.
Args:
k (int, optional): Defaults to 20.
Returns:
list: List with emojis
"""
emojis = list(EMOJI_UNICODE.values())
n = len(emojis)
emojis = emojis + [''] * k*n
return np.random.choice(emojis, self.size)
def _generate_timestamps(self, last=None):
"""Generate list of timestamps.
Args:
last (datetime, optional): Datetime of last message. If ``None``, defaults to current date.
Returns:
list: List with timestamps.
"""
if not last:
last = datetime.now()
last = last.replace(microsecond=0)
c = 1.0065
scale = 40.06
loc = 30
ts_ = [0] + lomax.rvs(c=c, loc=loc, scale=scale, size=self.size-1, random_state=self.seed).cumsum().tolist()
ts = [last-timedelta(seconds=t*60) for t in ts_]
return ts[::-1]
def _generate_users(self):
"""Generate list of users.
Returns:
list: List of name of the users sending the messages.
"""
return np.random.choice(self.users, self.size)
def _generate_df(self, last_timestamp=None):
"""Generate random chat as DataFrame.
Args:
last_timestamp (datetime, optional): Datetime of last message. If ``None``, defaults to current date.
Returns:
pandas.DataFrame: DataFrame with random messages.
"""
messages = self._generate_messages()
timestamps = self._generate_timestamps(last=last_timestamp)
users = self._generate_users()
df = pd.DataFrame.from_dict({
COLNAMES_DF.DATE: timestamps,
COLNAMES_DF.USERNAME: users,
COLNAMES_DF.MESSAGE: messages
}).set_index(COLNAMES_DF.DATE)
return df
[docs] def generate(self, filepath=None, hformat=None, last_timestamp=None):
"""Generate random chat as :func:`WhatsAppChat <whatstk.whatsapp.objects.WhatsAppChat>`.
Args:
filepath (str): If given, generated chat is saved with name ``filepath`` (must be a local path).
hformat (str, optional): :ref:`Format of the header <The header format>`, e.g.
``'[%y-%m-%d %H:%M:%S] - %name:'``.
last_timestamp (datetime, optional): Datetime of last message. If `None`, defaults to current date.
Returns:
WhatsAppChat: Chat with random messages.
.. seealso::
* :func:`WhatsAppChat.to_txt <whatstk.whatsapp.objects.WhatsAppChat.to_txt>`
"""
df = self._generate_df(last_timestamp=last_timestamp)
chat = WhatsAppChat(df)
if filepath:
chat.to_txt(filepath=filepath, hformat=hformat)
return chat