Source code for SSD.core.database

from typing import Union, List, Type, Dict, Tuple, Optional, Any, Callable
from os import remove, mkdir
from os.path import exists, join, sep, getsize
from inspect import getmembers
from peewee import ForeignKeyField
from playhouse.migrate import SqliteDatabase
from playhouse.signals import Signal, pre_save, post_save
from datetime import datetime
from numpy import unique

from SSD.core.adaptive_table import AdaptiveTable, StoringTable, ExchangeTable
from SSD.core.peewee_extension import generate_models
from SSD.core.exporter import Exporter

FieldType = Union[Tuple[str, Type], Tuple[str, Type, Any], Tuple[str, str]]


[docs] class Database:
[docs] def __init__(self, database_dir: str = '', database_name: str = 'database'): """ Manage the creation and loading of Tables in the Database. User interface to dynamically add, get and update entries. :param database_dir: Directory which contains the Database file. :param database_name: Name of the Database file. """ # Eventually remove extension from the database name database_name = database_name if len(database_name.split('.')) == 1 else database_name.split('.')[0] self.__database_dir = database_dir self.__database_name = database_name self.__database: Optional[SqliteDatabase] = None self.__tables: Dict[str, type(AdaptiveTable)] = {} self.__fk: Dict[str, Dict[str, str]] = {} self.__signals: List[Tuple[str, Signal, str, Callable, str]] = []
[docs] @staticmethod def make_name(table_name: str) -> str: """ Harmonize the Table names. :param table_name: Name of the Table. """ return table_name[0].upper() + table_name[1:].lower() if len(table_name) > 1 else table_name.upper()
[docs] def new(self, remove_existing: bool = False) -> 'Database': """ Create a new Database file. :param remove_existing: If True, Database file will be overwritten. """ # Create directory if not exists if not exists(self.__database_dir) and self.__database_dir != '': mkdir(self.__database_dir) # Check for existing similar files if exists(database_path := join(self.__database_dir, f'{self.__database_name}.db')): # Option 1: Overwriting file if remove_existing: remove(database_path) # Option 2: Indexing file name else: index = 1 while exists(database_path := join(self.__database_dir, f'{self.__database_name}({index}).db')): index += 1 self.__database_name = f'{self.__database_name}({index})' # Create the Database self.__database = SqliteDatabase(database_path) return self
[docs] def load(self, show_architecture: bool = False) -> 'Database': """ Load an existing Database file. :param show_architecture: If True, the loaded models will be printed. """ # Check file existence if not exists(database_path := join(self.__database_dir, f'{self.__database_name}.db')): raise ValueError(f"WARNING: the following Database does not exist ({database_path}).") # Load the Database self.__database = SqliteDatabase(database_path) models, database_descr = generate_models(self.__database) for table_name, model in models.items(): # Loading removes the '_' symbol in desc.model_names table_name_parts = table_name.split('_') loaded_name = database_descr.model_names[table_name] real_name = '' for i, table_name_part in enumerate(table_name_parts): real_name += loaded_name[:len(table_name_part)] if i == 0 else f'_{loaded_name[:len(table_name_part)]}' loaded_name = loaded_name[len(table_name_part):] # Register name table_name = self.make_name(real_name) self.__tables[table_name] = model self.__tables[table_name]._meta.name = table_name # Register FK for table_name in self.__tables: self.__fk[table_name] = {} for field_name, field in self.__tables[table_name].fields(only_names=False).items(): if type(field) == ForeignKeyField: self.__fk[table_name][field_name] = field.rel_model._meta.name # Show resulting architecture if show_architecture: self.print_architecture() return self
[docs] def get_path(self) -> Tuple[str, str]: """ Access the Database file path. """ return self.__database_dir, self.__database_name
[docs] def print_architecture(self): """ Print the content of the Database with Table(s) and their Field(s). """ print(f'\nDATABASE {self.__database_name}.db') print(''.join([table.description(indent=True, name=name) for name, table in self.__tables.items()]))
[docs] def get_architecture(self): """ Get the content of the Database with Table(s) and their Field(s). """ architecture = {} for table_name in self.__tables.keys(): description = self.__tables[table_name].description() fields = description.split(' - ') architecture[table_name] = [field[:-1] for field in fields[1:]] return architecture
[docs] def get_tables(self, only_names: bool = True): """ Get the names of created Tables in the Database. :param only_names: If True, only the names of the Tables will be returned in a List, otherwise the Tables themselves are returned in a Dict. """ if only_names: return list(self.__tables.keys()) return self.__tables
[docs] def get_fields(self, table_name: str, only_names: bool = True): """ Get the names of the Field(s) of a Tables of the Database. :param table_name: Name of the Table. :param only_names: If False, returns a dict containing {'table_name': Table}. """ table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown table with name {table_name}") return self.__tables[table_name].fields(only_names=only_names)
[docs] def create_table(self, table_name: str, storing_table: bool = True, fields: Optional[Union[FieldType, List[FieldType]]] = None): """ Add a new Table to the Database with customizable Fields. :param table_name: Name of the Table to add to the Database. :param storing_table: Specify whether the Table must be a storing or an exchange Table. :param fields: Name(s), type(s) and default value(s) of the Field(s) to add to the Table. """ table_name = self.make_name(table_name) self.__create(table_name=table_name, existing_table=False, storing_table=storing_table, fields=fields)
[docs] def create_fields(self, table_name: str, fields: Union[FieldType, List[FieldType]]): """ Add new Fields to a Table. :param table_name: Name of the Table on which to add the new Fields. :param fields: Name(s), type(s) and default value(s) of the Field(s) to add to the Table. """ table_name = self.make_name(table_name) self.__create(table_name=table_name, existing_table=True, fields=fields)
def __create(self, table_name: str, existing_table: bool, storing_table: bool = True, fields: Optional[Union[FieldType, List[FieldType]]] = None): # Create the table if not existing_table: self.__new_table(table_name=table_name, storing_table=storing_table) # Extend the fields fields = [fields] if type(fields) != list and fields is not None else fields self.__new_fields(table_name=table_name, fields=fields) def __new_table(self, table_name: str, storing_table: bool): if table_name not in self.__tables: # Create the new Table table_class = StoringTable if storing_table else ExchangeTable self.__tables[table_name] = type(table_name, (table_class,), dict(table_class.__dict__)) self.__tables[table_name]._meta.name = table_name self.__fk[table_name] = {} # Connect the Table the Database self.__tables[table_name].connect(self.__database) # Add a DateTimeField to exchange tables if not storing_table: self.__new_fields(table_name=table_name, fields=[('_dt_', datetime)]) def __new_fields(self, table_name: str, fields: List[FieldType]): if fields is not None: table = self.__tables[table_name] # Add each Field to the Table for field in fields: # Define name, type and default value field_name, field_type = field[0], field[1] field_default = '_null_' if len(field) == 2 else field[2] # As peewee.Model creates a new attribute named field_name, check that this attribute does not exist if field_name in [m[0] for m in getmembers(table)]: raise ValueError(f"Tried to create a field '{field_name}' in the Table '{table_name}'. " f"You are not allowed to create a field with this name, please rename it.") # Extend the Table if field_name not in table.fields(): # FK if type(field_type) == str: if (fk_table_name := self.make_name(field_type)) not in self.__tables.keys(): raise ValueError(f"Cannot create the ForeignKey '{fk_table_name}' since this Table does not" f"exists. Created Tables so far: {self.__tables.keys()}") table.extend_fk(self.__tables[fk_table_name], field_name) self.__fk[table_name][field_name] = fk_table_name else: table.extend(field_name, field_type, field_default)
[docs] def register_pre_save_signal(self, table_name: str, handler: Callable, name: Optional[str] = None): """ Connect a pre_save signal from a Table to a handler. :param table_name: Name of the Table that will be sender. :param handler: Executable code. :param name: Name of the signal. """ table_name = self.make_name(table_name) self.__signals.append(('pre_save', pre_save, table_name, self.__on_save_signal(handler), name))
[docs] def register_post_save_signal(self, table_name: str, handler: Callable, name: Optional[str] = None): """ Connect a post_save signal from a Table to a handler. :param table_name: Name of the Table that will be sender. :param handler: Executable code. :param name: Name of the signal. """ table_name = self.make_name(table_name) self.__signals.append(('post_save', post_save, table_name, self.__on_save_signal(handler), name))
@staticmethod def __on_save_signal(handler: Callable): def signal_handler(sender, instance, **kwargs): # Convert received information into Table name and data table_name = sender.get_name() handler(table_name, instance.__data__) return signal_handler
[docs] def connect_signals(self): """ Connect the registered signals between Tables and handlers. """ for signal in self.__signals: # Get the information of registered signals signal_type, signal_class, table_name, handler, name = signal # Check if the Table has been created if table_name not in self.__tables: print(f"WARNING: Signal '{signal_type}' was not connected with Table '{table_name}' as sender since " f"it was not created.") else: signal_class.connect(receiver=handler, sender=self.__tables[table_name], name=name) self.__signals = []
[docs] def add_data(self, table_name: str, data: Dict[str, Any]): """ Execute a line insert query. Return the index of the new line in the Table. :param table_name: Name of the Table. :param data: New line of the Table. """ table_name = self.make_name(table_name) return self.__add_data(table_name=table_name, data=data)
[docs] def add_batch(self, table_name: str, batch: Dict[str, List[Any]]): """ Execute a batch insert query. Return the indices of the new lines in the Table. :param table_name: Name of the Table. :param batch: New lines of the Table. """ table_name = self.make_name(table_name) # Check that the batch is well-formed if table_name in self.__fk: batch_values = [batch[key] for key in set(batch.keys()) - set(self.__fk[table_name])] if len(unique(samples := [len(b) for b in batch_values])) != 1: raise ValueError(f"The number of samples per batch must be the same for all fields. Number of samples " f"received per field: {dict(zip(batch.keys(), samples))}") return self.__add_data(table_name=table_name, data=batch, batched=True)
def __add_data(self, table_name: str, data: Union[Dict[str, Any], Dict[str, List[Any]]], batched: Optional[bool] = False): # Unpack kwargs fields_names = list(data.keys()) fields_values = list(data.values()) fields_types = [] for name, value in zip(fields_names, fields_values): if table_name in self.__fk and name in self.__fk[table_name]: fields_types.append(self.__fk[table_name][name]) elif batched: fields_types.append(type(value[0])) else: fields_types.append(type(value)) # Check table existence if table_name not in self.__tables: self.create_table(table_name=table_name, fields=list(zip(fields_names, fields_types))) table = self.__tables[table_name] # Check fields existence undefined_fields = set(fields_names) - set(table.fields()) if len(undefined_fields) > 0: # Empty table: add fields on the fly if len(table.select()) == 0: self.create_fields(table_name=table_name, fields=list(zip(fields_names, fields_types))) # Non-empty table else: raise ValueError(f"[{self.__class__.__name__}] Some fields where not defined in table {table}." f" As table {table} is non-empty, please define first the following fields :" f" {list(undefined_fields)}.") # Check FK data fk_fields = set(fields_names).intersection(set(self.__fk[table_name].keys())) for fk_field in fk_fields: idx = fields_names.index(fk_field) if type(fields_values[idx]) == dict: fk_table_name = self.__fk[table_name][fk_field] line = self.__add_data(table_name=fk_table_name, data=fields_values[idx], batched=batched) fields_values[idx] = line # Add the data to Table return table.add_data(fields_names=fields_names, fields_values=fields_values, batched=batched)
[docs] def update(self, table_name: str, data: Dict[str, Any], line_id: int = -1): """ Update a line of a Table. :param table_name: Name of the Table on which to perform the query. :param data: Updated data of the line. :param line_id: Index of the line to update. """ # Check table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown table with name {table_name}") table = self.__tables[table_name] # Unpack data fields_names = list(data.keys()) fields_values = list(data.values()) # Define the line index nb_line = self.nb_lines(table_name=table_name) if line_id < 0: line_id += nb_line + 1 elif line_id > nb_line: line_id = nb_line # Check fields existence undefined_fields = set(fields_names) - set(table.fields()) if len(undefined_fields) > 0: raise ValueError(f"[{self.__class__.__name__}] Some fields where not defined in table {table}." f" As table {table} is non-empty, please define first the following fields :" f" {list(undefined_fields)}.") # Check FK data fk_fields = set(fields_names).intersection(set(self.__fk[table_name].keys())) for fk_field in fk_fields: idx = fields_names.index(fk_field) if type(fields_values[idx]) == dict: fk_table_name = self.__fk[table_name][fk_field] fk_id = self.get_line(table_name=table_name, fields=fk_field, line_id=line_id)[fk_field] self.update(table_name=fk_table_name, data=fields_values[idx], line_id=fk_id) del fields_names[idx] del fields_values[idx] # Update query table.update(dict(zip(fields_names, fields_values))).where(table.id == line_id).execute()
[docs] def get_line(self, table_name: str, fields: Optional[Union[str, List[str]]] = None, line_id: int = -1, joins: Optional[Union[str, List[str]]] = None): """ Get a line of a Table. :param table_name: Name of the Table on which to perform the query. :param fields: Name(s) of the Field(s) to request. :param line_id: Index of the line to get. :param joins: Name(s) of Table(s) to join to the selection. """ # Check the Table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown table with name {table_name}") table = self.__tables[table_name] # Define the fields to select fields_selection = () if fields is not None: fields_selection += (table.id,) fields = [fields] if type(fields) == str else fields for field in fields: if field in table.fields(): fields_selection += (table.fields(only_names=False)[field],) if joins is not None: joins = [joins] if type(joins) == str else joins for j in joins: if j in self.__fk[table_name].values() and j not in fields: field_name = list(self.__fk[table_name].keys())[ list(self.__fk[table_name].values()).index(j)] fields_selection += (table.fields(only_names=False)[field_name],) # Define the index of the line to select nb_line = self.nb_lines(table_name=table_name) if line_id < 0: line_id += nb_line + 1 elif line_id > nb_line: line_id = nb_line # Selection query data = table.select(*fields_selection).where(table.id == line_id).dicts()[0] # Join if joins is not None: joins = [joins] if type(joins) == str else joins for j in joins: if j in self.__fk[table_name].values(): field_name = list(self.__fk[table_name].keys())[list(self.__fk[table_name].values()).index(j)] if field_name in data: data[field_name] = self.get_line(table_name=j, fields=fields, line_id=data[field_name], joins=j) return data
[docs] def get_lines(self, table_name: str, fields: Optional[Union[str, List[str]]] = None, lines_id: Optional[List[int]] = None, lines_range: Optional[List[int]] = None, joins: Optional[Union[str, List[str]]] = None, batched: bool = False): """ Get a set of lines of a Table. :param table_name: Name of the Table on which to perform the query. :param fields: Name(s) of the Field(s) to select. :param lines_id: Indices of the lines to get. If not specified, 'lines_range' value will be used. :param lines_range: Range of indices of the lines to get. If not specified, all lines will be selected. :param joins: Name(s) of Table(s) to join to the selection. :param batched: If True, data is returned as one batch per field. Otherwise, data is returned as list of lines. """ # Check table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown table with name {table_name}") table = self.__tables[table_name] # Define the fields to select fields_selection = () if fields is not None: fields_selection += (table.id,) fields = [fields] if type(fields) == str else fields for field in fields: if field in table.fields(): fields_selection += (table.fields(only_names=False)[field],) if joins is not None: joins = [joins] if type(joins) == str else joins for j in joins: if j in self.__fk[table_name].values() and j not in fields: field_name = list(self.__fk[table_name].keys())[ list(self.__fk[table_name].values()).index(j)] fields_selection += (table.fields(only_names=False)[field_name],) # Define the indices of lines to select if lines_id is None: if lines_range is not None and len(lines_range) != 2: raise ValueError("The range of lines must contains the first and the last line indices.") nb_line = self.nb_lines(table_name=table_name) first_line_id = lines_range[0] if lines_range is not None else 1 last_line_id = lines_range[1] if lines_range is not None else nb_line _slice = [first_line_id, last_line_id] for i, idx in enumerate(_slice): if idx < 0: _slice[i] += nb_line + 1 elif idx > nb_line: _slice[i] = nb_line _slice[1] = _slice[0] + 1 if _slice[1] < _slice[0] else _slice[1] + 1 lines_id = range(*_slice) # Selection query query = table.select(*fields_selection).where(table.id << lines_id).dicts() # Return the lines as batch or as list of lines lines: Union[Dict[str, List[Any]], List[Dict[str, Any]]] if batched: lines = dict(zip(query[0].keys(), [[query[i][key] for i in range(len(query))] for key in query[0].keys()])) else: lines = [line for line in query] # Join if joins is not None: joins = [joins] if type(joins) == str else joins for j in joins: if j in self.__fk[table_name].values(): field_name = list(self.__fk[table_name].keys())[ list(self.__fk[table_name].values()).index(j)] dict_keys = lines.keys() if batched else lines[0].keys() if field_name in dict_keys: lines_id = lines[field_name] if batched else [line[field_name] for line in lines] data = self.get_lines(table_name=j, fields=fields, lines_id=lines_id, joins=joins, batched=batched) if batched: lines[field_name] = data else: for i, l in enumerate(data): lines[i][field_name] = l return lines
[docs] def nb_lines(self, table_name: str): """ Return the number of entries on a Table. :param table_name: Name of the Table. """ # Check the Table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown table with name {table_name}") # Get the number of entries return self.__tables[table_name].select().count()
@property def memory_size(self): """ Return the Database file memory size in bytes. """ return getsize(join(self.__database_dir, f'{self.__database_name}.db'))
[docs] def close(self, erase_file: bool = False): """ Close the Database. :param erase_file: If True, the Database file will be erased. """ self.__database.close() if erase_file and exists(database_path := join(self.__database_dir, f'{self.__database_name}.db')): remove(database_path)
[docs] def rename_table(self, table_name: str, new_table_name: str): """ Rename a Table of the Database. :param table_name: Current name of the Table to rename. :param new_table_name: New name of the Table. """ # Check the Table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown table with name {table_name}") # Renaming self.__tables[new_table_name] = self.__tables.pop(table_name) self.__tables[new_table_name].rename_table(table_name, new_table_name)
[docs] def rename_field(self, table_name: str, field_name: str, new_field_name: str): """ Rename a Field of a Table of the Database. :param table_name: Name of the Table. :param field_name: Current name of the Field to rename. :param new_field_name: New name of the Field. """ # Check the Table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown Table with name '{table_name}'") # Check the field existence if field_name not in self.__tables[table_name].fields(): raise ValueError(f"Unknown Field with name '{field_name}' for Table '{table_name}'") # Renaming self.__tables[table_name].rename_field(field_name, new_field_name)
[docs] def remove_table(self, table_name: str): """ Remove a Table from the Database. :param table_name: Name of the Table. """ # Check the table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown Table with name '{table_name}'") # Remove the Table self.__database.drop_tables(self.__tables[table_name]) del self.__tables[table_name]
[docs] def remove_field(self, table_name: str, field_name: str): """ Remove a Field of a Table of the Database. :param table_name: Name of the Table. :param field_name: Current name of the Field to remove. """ # Check the Table existence table_name = self.make_name(table_name) if table_name not in self.__tables: raise ValueError(f"Unknown Table with name '{table_name}'") # Check the field existence if field_name not in self.__tables[table_name].fields(): raise ValueError(f"Unknown Field with name '{field_name}' for Table '{table_name}'") # Renaming self.__tables[table_name].remove_field(field_name)
[docs] def export(self, exporter: str, filename: str, tables: Optional[Union[str, List[str]]] = None) -> None: """ Export the Database to a CSV or JSON file. :param exporter: Exporter type ('json' or 'csv'). :param filename: Exported filename. :param tables: Tables to export. """ # Check exporter format exporter = exporter.lower() if exporter not in ['json', 'csv']: raise ValueError(f"Unknown exporter with name {exporter}. Available exporters are ['json', 'csv'].") # Set good file extension file_path = filename.split(sep) file_name = file_path.pop(-1) file_name = file_name if len(file_name.split('.')) == 1 else file_name.split('.')[0] filename = join(*file_path[:-1], file_name) # Get the tables to export tables = self.get_tables() if tables is None else tables tables = [tables] if type(tables) != list else tables for table in tables: if table not in self.get_tables(): raise ValueError(f"The following Table does not exist: {table}") # Export each table # Todo: see 'at once' version for table in tables: _filename = filename + f'_{table}.{exporter}' if exporter == 'json': query = self.get_lines(table_name=table, batched=True) Exporter.export_json(filename=_filename, query=query) else: query = self.__tables[table].select().tuples() Exporter.export_csv(filename=_filename, query=query)