Source code for whatstk.whatsapp.objects

"""Library WhatsApp objects."""


from whatstk._chat import BaseChat
from whatstk.utils.chat_merge import merge_chats
from whatstk.whatsapp.parser import df_from_txt_whatsapp


[docs]class WhatsAppChat(BaseChat): """Load and process a WhatsApp chat file. Args: df (pandas.DataFrame): Chat. Example: This simple example loads a chat using :func:`WhatsAppChat <WhatsAppChat>`. Once loaded, we can access its attribute :func:`df <WhatsAppChat.df>`, which contains the loaded chat as a DataFrame. .. code-block:: python >>> from whatstk.whatsapp.objects import WhatsAppChat >>> from whatstk.data import whatsapp_urls >>> chat = WhatsAppChat.from_source(filepath=whatsapp_urls.POKEMON) >>> chat.df.head(5) username message date 2016-08-06 13:23:00 Ash Ketchum Hey guys! 2016-08-06 13:25:00 Brock Hey Ash, good to have a common group! 2016-08-06 13:30:00 Misty Hey guys! Long time haven't heard anything fro... 2016-08-06 13:45:00 Ash Ketchum Indeed. I think having a whatsapp group nowada... 2016-08-06 14:30:00 Misty Definetly """ def __init__(self, df): """Constructor. Args: df (pandas.DataFrame): Chat. """ super().__init__(df, platform='whatsapp')
[docs] @classmethod def from_source(cls, filepath, **kwargs): """Create an instance from a chat text file. Args: filepath (str): Path to the file. It can be a local file (e.g. 'path/to/file.txt') or an URL to a hosted file (e.g. 'http://www.url.to/file.txt') **kwargs: Refer to the docs from :func:`df_from_txt_whatsapp <whatstk.whatsapp.parser.df_from_txt_whatsapp>` for details on additional arguments. Returns: WhatsAppChat: Class instance with loaded and parsed chat. .. seealso:: * :func:`df_from_txt_whatsapp <whatstk.whatsapp.parser.df_from_txt_whatsapp>` * :func:`WhatsAppChat.from_sources <whatstk.WhatsAppChat.from_sources>` """ # Prepare DataFrame df = df_from_txt_whatsapp(filepath=filepath, **kwargs) return cls(df)
[docs] @classmethod def from_sources(cls, filepaths, auto_header=None, hformat=None, encoding='utf-8'): """Load a WhatsAppChat instance from multiple sources. Args: filepaths (list): List with filepaths. auto_header (bool, optional): Detect header automatically (applies to all files). If None, attempts to perform automatic header detection for all files. If False, ``hformat`` is required. hformat (list, optional): List with the :ref:`header format <The header format>` to be used for each file. The list must be of length equal to ``len(filenames)``. A valid header format might be '[%y-%m-%d %H:%M:%S] - %name:'. encoding (str): Encoding to use for UTF when reading/writing (ex. ‘utf-8’). `List of Python standard encodings <https://docs.python.org/3/library/codecs.html#standard-encodings>`_. Returns: WhatsAppChat: Class instance with loaded and parsed chat. .. seealso:: * :func:`WhatsAppChat.from_source <WhatsAppChat.from_source>` * :func:`merge_chats <whatstk.utils.chat_merge.merge_chats>` Example: Load a chat using two text files. In this example, we use sample chats (available online, see urls in source code :mod:`whatstk.data <whatstk.data>`). .. code-block:: python >>> from whatstk.whatsapp.objects import WhatsAppChat >>> from whatstk.data import whatsapp_urls >>> filepath_1 = whatsapp_urls.LOREM1 >>> filepath_2 = whatsapp_urls.LOREM2 >>> chat = WhatsAppChat.from_sources(filepaths=[filepath_1, filepath_2]) >>> chat.df.head(5) username message date 2019-10-20 10:16:00 John Laborum sed excepteur id eu cillum sunt ut. 2019-10-20 11:15:00 Mary Ad aliquip reprehenderit proident est irure mo... 2019-10-20 12:16:00 +1 123 456 789 Nostrud adipiscing ex enim reprehenderit minim... 2019-10-20 12:57:00 +1 123 456 789 Deserunt proident laborum exercitation ex temp... 2019-10-20 17:28:00 John Do ex dolor consequat tempor et ex. """ dfs = [] if auto_header is None or auto_header: auto_header = [True]*len(filepaths) else: auto_header = [False]*len(filepaths) if hformat is None: hformat = [None]*len(filepaths) for filepath, ah, hf in zip(filepaths, auto_header, hformat): chat = WhatsAppChat.from_source(filepath, auto_header=ah, hformat=hf, encoding=encoding) dfs.append(chat.df) df = merge_chats(dfs) return cls(df)
[docs] def to_txt(self, filepath, hformat=None): """Export chat to a text file. Usefull to export the chat to different formats (i.e. using different hformats). Args: filepath (str): Name of the file to export (must be a local path). hformat (str, optional): Header format. Defaults to '%y-%m-%d, %H:%M - %name:'. """ if not filepath.endswith('.txt'): raise ValueError("filepath must end with .txt") if not hformat: hformat = "%y-%m-%d, %H:%M - %name:" lines = [] raw_lines = self.df.reset_index().values.tolist() for line in raw_lines: date, user, text = line hformat = hformat.replace('%name', '{name}') header = date.strftime(hformat).format(name=user) formatted_line = '{} {}'.format(header, text) lines.append(formatted_line) text = '\n'.join(lines) with open(filepath, 'w') as f: f.write(text)