From 4ab567575bdb8fb8ce83568e827cc041b9763c0a Mon Sep 17 00:00:00 2001 From: Scot Hacker Date: Sat, 9 Mar 2019 11:24:20 -0800 Subject: [PATCH] More validations, break out validation function --- todo/data/import_example.csv | 9 +- todo/management/commands/import_csv.py | 3 +- todo/operations/csv_importer.py | 115 +++++++++++++++++++++---- 3 files changed, 104 insertions(+), 23 deletions(-) diff --git a/todo/data/import_example.csv b/todo/data/import_example.csv index 8762bb9..bc33c15 100644 --- a/todo/data/import_example.csv +++ b/todo/data/import_example.csv @@ -1,5 +1,6 @@ Title,Group,Task List,Created Date,Due Date,Completed,Created By,Assigned To,Note,Priority -Make dinner,Scuba Divers,Example List,2012-03-14,,No,shacker,user1,This is as good as it gets,3 -Bake bread,Scuba Divers,Example List,2012-03-14,2012-03-14,,shacker,,, -Eat food,Scuba Divers,Example List,,2015-06-24,Yes,,user2,Every generation throws a hero up the pop charts,77 -Be glad,Scuba Divers,Example List,2019-03-07,,,,user2,,1 \ No newline at end of file +Make dinner,Scuba Divers,Example List,2012-03-14,,No,shacker,shacker,This is as good as it gets,3 +Bake bread,Scuba Divers,Example List,2012-03-14,2012-03-14,,nonexistentusername,,, +Eat food,Coyotes,Example List,,2015-06-24,Yes,user3,user2,Every generation throws a hero up the pop charts,77 +Be glad,Scuba Divers,Example List,2019-03-07,,,user3,user2,,1 +Dog food,Scuba Divers,Example List,2019-03-07,,,,user2,,1 \ No newline at end of file diff --git a/todo/management/commands/import_csv.py b/todo/management/commands/import_csv.py index b27f2fc..f170350 100644 --- a/todo/management/commands/import_csv.py +++ b/todo/management/commands/import_csv.py @@ -27,4 +27,5 @@ class Command(BaseCommand): # Don't check validity of filepath here; upserter will do that. filepath = str(options.get("file")) - CSVImporter.upsert(filepath) + importer = CSVImporter() + importer.upsert(filepath) diff --git a/todo/operations/csv_importer.py b/todo/operations/csv_importer.py index 6112a0f..78916b5 100644 --- a/todo/operations/csv_importer.py +++ b/todo/operations/csv_importer.py @@ -1,39 +1,118 @@ import csv +import logging import sys from pathlib import Path from django.contrib.auth import get_user_model +from django.contrib.auth.models import Group from icecream import ic from todo.models import Task, TaskList +log = logging.getLogger(__name__) + class CSVImporter: - """Core upsert functionality for CSV import, for re-use by `import_csv` management command, web UI and tests.""" + """Core upsert functionality for CSV import, for re-use by `import_csv` management command, web UI and tests. + For each row processed, first we try and get the correct related objects or set default values, then decide + on our upsert logic - create or update? We must enforce internal rules during object creation and take a SAFE + approache - for example + we shouldn't add a task if it specifies that a user is not a specified group. For that reason, it also doesn't + make sense to create new groups from here. In other words, the ingested CSV must accurately represent the current + database. Non-conforming rows are skipped and logged. Unlike manual task creation, we won't assume that the person + running this ingestion is the task creator - the creator must be specified, and a blank cell is an error. We also + do not create new lists - they must already exist. + + Supplies a detailed log of what was and was not imported at the end.""" def __init__(self): - pass + self.errors = [] + self.line_count = 0 - def upsert(filepath): + def upsert(self, filepath): if not Path(filepath).exists(): print(f"Sorry, couldn't find file: {filepath}") sys.exit(1) - # Have arg and good file path, read rows with open(filepath, mode="r") as csv_file: - csv_reader = csv.DictReader(csv_file) - line_count = 0 - for row in csv_reader: - # Title, Group, Task List, Created Date, Due Date, Completed, Created By, Assigned To, Note, Priority - newrow = row # Copy so we can modify properties - newrow["Completed"] = True if row.get("Completed") == "Yes" else False - ic(newrow) + # Have arg and good file path -- read rows + # Inbound columns: + # Title, Group, Task List, Created Date, Due Date, Completed, Created By, Assigned To, Note, Priority - if line_count == 0: - print(f'Column names are {", ".join(row)}') - line_count += 1 - print( - f"Row {line_count}: Title: {newrow['Title']}, Group: {newrow['Group']}, Completed: {newrow['Completed']}" - ) - line_count += 1 + csv_reader = csv.DictReader(csv_file) + for row in csv_reader: + self.line_count += 1 + + newrow = self.validate_row(row) # Copy so we can modify properties + if newrow: + ic(newrow) + print("\n") + + # Report + for msg in self.errors: + print(msg) + + print(f"\nProcessed {self.line_count} rows") + print(f"Inserted xxx rows") + + def validate_row(self, row): + """Perform data integrity checks and set default values. Returns a valid object for insertion, or False. + Errors are stored for later display.""" + + # Task creator must exist + if not row.get("Created By"): + msg = f"Skipped row {self.line_count}: Missing required task creator." + self.errors.append(msg) + return False + + created_by = get_user_model().objects.filter(username=row.get("Created By")) + if created_by.exists(): + creator = created_by.first() + else: + msg = f"Skipped row {self.line_count}: Invalid task creator {row.get('Created By')}" + self.errors.append(msg) + return False + + # If specified, Assignee must exist + if row.get("Assigned To"): + assigned = get_user_model().objects.filter(username=row.get("Assigned To")) + if assigned.exists(): + assignee = assigned.first() + else: + msg = f"Skipped row {self.line_count}: Missing or invalid task assignee {row.get('Assigned To')}" + self.errors.append(msg) + return False + else: + assignee = None # Perfectly valid + + # Group must exist + try: + target_group = Group.objects.get(name=row.get("Group")) + except Group.DoesNotExist: + msg = f"Skipped row {self.line_count}: Could not find group {row.get('Group')}." + self.errors.append(msg) + return False + + # Task creator must be in the target group + if target_group not in creator.groups.all(): + msg = f"Skipped row {self.line_count}: {creator} is not in group {target_group}" + self.errors.append(msg) + return False + + # Assignee must be in the target group + if assignee and target_group not in assignee.groups.all(): + msg = f"Skipped row {self.line_count}: {assignee} is not in group {target_group}" + self.errors.append(msg) + return False + + # Group membership checks have passed + row["Created By"] = creator + row["Group"] = target_group + if assignee: + row["Assigned To"] = assignee + + # Set Completed default + row["Completed"] = True if row.get("Completed") == "Yes" else False + + return row