From 365435e8395c576741d98673ccba21a5c222754b Mon Sep 17 00:00:00 2001 From: Scot Hacker Date: Sat, 9 Mar 2019 17:59:35 -0800 Subject: [PATCH] Finish upsert code, and documentation --- README.md | 38 +++++++++- todo/data/import_example.csv | 2 +- todo/operations/csv_importer.py | 121 +++++++++++++++++++------------- 3 files changed, 111 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 5c15123..dcc5631 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,6 @@ The current django-todo version number is available from the [todo package](http python -c "import todo; print(todo.__version__)" - ## Upgrade Notes django-todo 2.0 was rebuilt almost from the ground up, and included some radical changes, including model name changes. As a result, it is *not compatible* with data from django-todo 1.x. If you would like to upgrade an existing installation, try this: @@ -166,6 +165,43 @@ django-todo uses pytest exclusively for testing. The best way to run the suite i The previous `tox` system was removed with the v2 release, since we no longer aim to support older Python or Django versions. + +# Importing Tasks via CSV + +django-todo has the ability to batch-import ("upsert") tasks from a specifically formatted CSV spreadsheet. This ability is provided through both a management command or the web interface. + +## Management Command + +`./manage.py import_csv -f /path/to/file.csv` + +## Web Importer + +Link from your navigation to `{url todo:import_csv}` + +## Import Logic + +Because data entered via CSV is not going through the same view permissions enforced in the rest of django-todo, and to simplify the logic of when to update vs create a record, etc., the importer will *not* create new users, groups, or task lists. All users, groups, and task lists referenced i your CSV must already exist, and memberships must be correct (if you have a row specifying a user in an incorrect group, the importer will skip that row). + +Any validation error (e.g. unparse-able dates) results in that row being skipped. + +A report of rows upserted and rows skipped (with line numbers and reasons) is provided at the end of the run. + +## CSV Formatting + +Copy `todo/data/import_example.csv` to another location on your system and edit in a spreadsheet or directly. + +The "Created By", "Task List" and "Group" columns are required -- all others are optional and should work pretty much exactly like manual task entry via the web UI. + +Note: Internally, Tasks are keyed to TaskLists, not to Groups (TaskLists are in Gruops). However, we request the Group in the CSV +because it's possible to have multiple TaskLists with the same name in different groups; i.e. we need it for namespacing and permissions. + +## Upsert Logic: + +For each valid row, we need to decide whether to create a new task or update an existing one. django-todo matches on the unique combination of Task List, Task Title, and Created By. If we find a task that matches those three, we *update* the rest of the columns. In other words, if you import a CSV once, then edit the Assigned To for a task and import it again, the original task will be updated with a new assignee (and same for the other columns). + +Otherwise we create a new task. + + # Version History **2.2.1** Convert task delete and toggle_done views to POST only diff --git a/todo/data/import_example.csv b/todo/data/import_example.csv index 8a312dd..998e778 100644 --- a/todo/data/import_example.csv +++ b/todo/data/import_example.csv @@ -1,5 +1,5 @@ Title,Group,Task List,Created Date,Due Date,Completed,Created By,Assigned To,Note,Priority -Make dinner,Scuba Divers,Groovy,2012-03-12,2012-03-14,No,shacker,shacker,This is as good as it gets,3 +Make dinner,Scuba Divers,Groovy,2012-03-12,2012-03-14,No,shacker,shacker,Temmo is a dog,3 Bake bread,Scuba Divers,Example List,2012-03-14,2012-03-14,,nonexistentusername,,, Eat food,Scuba Divers,Groovy,,2015-06-24,Yes,user1,user1,Every generation throws a hero up the pop charts,77 Be glad,Scuba Divers,Example List,2019-03-07,,,user3,user2,,1 diff --git a/todo/operations/csv_importer.py b/todo/operations/csv_importer.py index 7e1b7fc..13034ee 100644 --- a/todo/operations/csv_importer.py +++ b/todo/operations/csv_importer.py @@ -15,21 +15,13 @@ log = logging.getLogger(__name__) class CSVImporter: """Core upsert functionality for CSV import, for re-use by `import_csv` management command, web UI and tests. - For each row processed, first we try and get the correct related objects or set default values, then decide - on our upsert logic - create or update? We must enforce internal rules during object creation and take a SAFE - approache - for example - we shouldn't add a task if it specifies that a user is not a specified group. For that reason, it also doesn't - make sense to create new groups from here. In other words, the ingested CSV must accurately represent the current - database. Non-conforming rows are skipped and logged. Unlike manual task creation, we won't assume that the person - running this ingestion is the task creator - the creator must be specified, and a blank cell is an error. We also - do not create new lists - they must already exist (because if we did create new lists we'd also have to add the user to it, - etc.) - - Supplies a detailed log of what was and was not imported at the end.""" + Supplies a detailed log of what was and was not imported at the end. See README for usage notes. + """ def __init__(self): self.errors = [] self.line_count = 0 + self.upsert_count = 0 def upsert(self, filepath): @@ -38,29 +30,49 @@ class CSVImporter: sys.exit(1) with open(filepath, mode="r") as csv_file: - # Have arg and good file path -- read rows - # Inbound columns: + # Have arg and good file path -- read in rows as dicts. + # Header row is: # Title, Group, Task List, Created Date, Due Date, Completed, Created By, Assigned To, Note, Priority + print("\n") csv_reader = csv.DictReader(csv_file) for row in csv_reader: self.line_count += 1 - newrow = self.validate_row(row) # Copy so we can modify properties + newrow = self.validate_row(row) if newrow: - ic(newrow) - print("\n") + # newrow at this point is fully validated, and all FK relations exist, + # e.g. `newrow.get("Assigned To")`, is a Django User instance. + obj, created = Task.objects.update_or_create( + created_by=newrow.get("Created By"), + task_list=newrow.get("Task List"), + title=newrow.get("Title"), + defaults={ + "assigned_to": newrow.get("Assigned To"), + "completed": newrow.get("Completed"), + "created_date": newrow.get("Created Date"), + "due_date": newrow.get("Due Date"), + "note": newrow.get("Note"), + "priority": newrow.get("Priority"), + }, + ) + self.upsert_count += 1 + print( + f"Upserted task {obj.id}: \"{obj.title}\"" + f"in list \"{obj.task_list}\" (group \"{obj.task_list.group}\")" + ) # Report. Stored errors has the form: # self.errors = [{3: ["Incorrect foo", "Non-existent bar"]}, {7: [...]}] + print("\n") for error_dict in self.errors: for k, error_list in error_dict.items(): - print(f"Skipped row {k}:") + print(f"Skipped CSV row {k}:") for msg in error_list: print(f"\t{msg}") - print(f"\nProcessed {self.line_count} rows") - print(f"Inserted xxx rows") + print(f"\nProcessed {self.line_count} CSV rows") + print(f"Upserted {self.upsert_count} rows") def validate_row(self, row): """Perform data integrity checks and set default values. Returns a valid object for insertion, or False. @@ -68,6 +80,7 @@ class CSVImporter: row_errors = [] + # ####################### # Task creator must exist if not row.get("Created By"): msg = f"Missing required task creator." @@ -81,6 +94,7 @@ class CSVImporter: msg = f"Invalid task creator {row.get('Created By')}" row_errors.append(msg) + # ####################### # If specified, Assignee must exist if row.get("Assigned To"): assigned = get_user_model().objects.filter(username=row.get("Assigned To")) @@ -92,6 +106,7 @@ class CSVImporter: else: assignee = None # Perfectly valid + # ####################### # Group must exist try: target_group = Group.objects.get(name=row.get("Group")) @@ -99,53 +114,63 @@ class CSVImporter: msg = f"Could not find group {row.get('Group')}." row_errors.append(msg) + # ####################### # Task creator must be in the target group if creator and target_group not in creator.groups.all(): msg = f"{creator} is not in group {target_group}" row_errors.append(msg) + # ####################### # Assignee must be in the target group if assignee and target_group not in assignee.groups.all(): msg = f"{assignee} is not in group {target_group}" row_errors.append(msg) + # ####################### + # Task list must exist in the target group + try: + tasklist = TaskList.objects.get(name=row.get("Task List"), group=target_group) + row["Task List"] = tasklist + except TaskList.DoesNotExist: + msg = f"Task list {row.get('Task List')} in group {target_group} does not exist" + row_errors.append(msg) + + # ####################### + # Validate Due Date + dd = row.get("Due Date") + if dd: + try: + row["Due Date"] = datetime.datetime.strptime(dd, "%Y-%m-%d") + except ValueError: + msg = f"Could not convert Due Date {dd} to python date" + row_errors.append(msg) + else: + row["Created Date"] = None # Override default empty string '' value + + + # ####################### + # Validate Created Date + cd = row.get("Created Date") + if cd: + try: + row["Created Date"] = datetime.datetime.strptime(cd, "%Y-%m-%d") + except ValueError: + msg = f"Could not convert Created Date {cd} to python date" + row_errors.append(msg) + else: + row["Created Date"] = None # Override default empty string '' value + + # ####################### # Group membership checks have passed row["Created By"] = creator row["Group"] = target_group if assignee: row["Assigned To"] = assignee - # Task list must exist in the target group - try: - tasklist = TaskList.objects.get(name=row.get("Task List"), group=target_group) - row["Task List"] = tasklist - except TaskList.DoesNotExist: - msg = ( - f"Task list {row.get('Task List')} in group {target_group} does not exist" - ) - row_errors.append(msg) - - # Validate Due Date - dd = row.get("Due Date") - if dd: - try: - row["Due Date"] = datetime.datetime.strptime(dd, '%Y-%m-%d') - except ValueError: - msg = f"Could not convert Due Date {dd} to python date" - row_errors.append(msg) - - # Validate Created Date - cd = row.get("Created Date") - if cd: - try: - row["Created Date"] = datetime.datetime.strptime(cd, '%Y-%m-%d') - except ValueError: - msg = f"Could not convert Created Date {cd} to python date" - row_errors.append(msg) - - # Set Completed default + # Set Completed row["Completed"] = True if row.get("Completed") == "Yes" else False + # ####################### if row_errors: self.errors.append({self.line_count: row_errors}) return False