Import tasks via CSV (#51)
* Bare start on CSV support * Move core of CSV importer to operations * More validations, break out validation function * Validate dates and TaskList; convert errors to list of dictionaries * Finish upsert code, and documentation * Print msgs from the mgmt command, not the operations module * Handle BOM marks * Handle both in-memory and local file objects * Update readme * Working browser-upload view * Bail on incorrect headers * Fix default values and finish example spreadsheet * Change column order, update docs * Update index.md for RTD * First round of responses to PR feedback * Restore independent summaries/errors/upserts properties * PR responses * Split off reusable date validator into separate function * Fix URLs append * General test suite for CSV importer
This commit is contained in:
parent
184084c6a8
commit
4a99d90d1e
15 changed files with 599 additions and 15 deletions
4
todo/data/import_example.csv
Normal file
4
todo/data/import_example.csv
Normal file
|
@ -0,0 +1,4 @@
|
|||
Title,Group,Task List,Created By,Created Date,Due Date,Completed,Assigned To,Note,Priority
|
||||
Make dinner,Scuba Divers,Web project,shacker,,2019-06-14,No,,Please check with mgmt first,3
|
||||
Bake bread,Scuba Divers,Example List,mr_random,2012-03-14,,Yes,,,
|
||||
Bring dessert,Scuba Divers,Web project,user1,2015-06-248,,,user1,Every generation throws a hero up the pop charts,77
|
|
57
todo/management/commands/import_csv.py
Normal file
57
todo/management/commands/import_csv.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import sys
|
||||
from typing import Any
|
||||
from pathlib import Path
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandParser
|
||||
|
||||
from todo.operations.csv_importer import CSVImporter
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """Import specifically formatted CSV file containing incoming tasks to be loaded.
|
||||
For specfic format of inbound CSV, see data/import_example.csv.
|
||||
For documentation on upsert logic and required fields, see README.md.
|
||||
"""
|
||||
|
||||
def add_arguments(self, parser: CommandParser) -> None:
|
||||
|
||||
parser.add_argument(
|
||||
"-f", "--file", dest="file", default=None, help="File to to inbound CSV file."
|
||||
)
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
# Need a file to proceed
|
||||
if not options.get("file"):
|
||||
print("Sorry, we need a filename to work from.")
|
||||
sys.exit(1)
|
||||
|
||||
filepath = Path(options["file"])
|
||||
|
||||
if not filepath.exists():
|
||||
print(f"Sorry, couldn't find file: {filepath}")
|
||||
sys.exit(1)
|
||||
|
||||
# Encoding "utf-8-sig" means "ignore byte order mark (BOM), which Excel inserts when saving CSVs."
|
||||
with filepath.open(mode="r", encoding="utf-8-sig") as fileobj:
|
||||
importer = CSVImporter()
|
||||
results = importer.upsert(fileobj, as_string_obj=True)
|
||||
|
||||
# Report successes, failures and summaries
|
||||
print()
|
||||
if results["upserts"]:
|
||||
for upsert_msg in results["upserts"]:
|
||||
print(upsert_msg)
|
||||
|
||||
# Stored errors has the form:
|
||||
# self.errors = [{3: ["Incorrect foo", "Non-existent bar"]}, {7: [...]}]
|
||||
if results["errors"]:
|
||||
for error_dict in results["errors"]:
|
||||
for k, error_list in error_dict.items():
|
||||
print(f"\nSkipped CSV row {k}:")
|
||||
for msg in error_list:
|
||||
print(f"- {msg}")
|
||||
|
||||
print()
|
||||
if results["summaries"]:
|
||||
for summary_msg in results["summaries"]:
|
||||
print(summary_msg)
|
22
todo/migrations/0009_priority_optional.py
Normal file
22
todo/migrations/0009_priority_optional.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
# Generated by Django 2.1.7 on 2019-03-18 23:14
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('todo', '0008_mail_tracker'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='task',
|
||||
options={'ordering': ['priority', 'created_date']},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='task',
|
||||
name='priority',
|
||||
field=models.PositiveIntegerField(blank=True, null=True),
|
||||
),
|
||||
]
|
|
@ -82,7 +82,7 @@ class Task(models.Model):
|
|||
on_delete=models.CASCADE,
|
||||
)
|
||||
note = models.TextField(blank=True, null=True)
|
||||
priority = models.PositiveIntegerField()
|
||||
priority = models.PositiveIntegerField(blank=True, null=True)
|
||||
|
||||
# Has due date for an instance of this object passed?
|
||||
def overdue_status(self):
|
||||
|
@ -115,7 +115,7 @@ class Task(models.Model):
|
|||
self.delete()
|
||||
|
||||
class Meta:
|
||||
ordering = ["priority"]
|
||||
ordering = ["priority", "created_date"]
|
||||
|
||||
|
||||
class Comment(models.Model):
|
||||
|
|
0
todo/operations/__init__.py
Normal file
0
todo/operations/__init__.py
Normal file
197
todo/operations/csv_importer.py
Normal file
197
todo/operations/csv_importer.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
import codecs
|
||||
import csv
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.auth.models import Group
|
||||
|
||||
from todo.models import Task, TaskList
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CSVImporter:
|
||||
"""Core upsert functionality for CSV import, for re-use by `import_csv` management command, web UI and tests.
|
||||
Supplies a detailed log of what was and was not imported at the end. See README for usage notes.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
self.upserts = []
|
||||
self.summaries = []
|
||||
self.line_count = 0
|
||||
self.upsert_count = 0
|
||||
|
||||
def upsert(self, fileobj, as_string_obj=False):
|
||||
"""Expects a file *object*, not a file path. This is important because this has to work for both
|
||||
the management command and the web uploader; the web uploader will pass in in-memory file
|
||||
with no path!
|
||||
|
||||
Header row is:
|
||||
Title, Group, Task List, Created Date, Due Date, Completed, Created By, Assigned To, Note, Priority
|
||||
"""
|
||||
|
||||
if as_string_obj:
|
||||
# fileobj comes from mgmt command
|
||||
csv_reader = csv.DictReader(fileobj)
|
||||
else:
|
||||
# fileobj comes from browser upload (in-memory)
|
||||
csv_reader = csv.DictReader(codecs.iterdecode(fileobj, "utf-8"))
|
||||
|
||||
# DI check: Do we have expected header row?
|
||||
header = csv_reader.fieldnames
|
||||
expected = [
|
||||
"Title",
|
||||
"Group",
|
||||
"Task List",
|
||||
"Created By",
|
||||
"Created Date",
|
||||
"Due Date",
|
||||
"Completed",
|
||||
"Assigned To",
|
||||
"Note",
|
||||
"Priority",
|
||||
]
|
||||
if header != expected:
|
||||
self.results.get("summaries").append(
|
||||
f"Inbound data does not have expected columns.\nShould be: {expected}"
|
||||
)
|
||||
return self.results
|
||||
|
||||
for row in csv_reader:
|
||||
self.line_count += 1
|
||||
|
||||
newrow = self.validate_row(row)
|
||||
if newrow:
|
||||
# newrow at this point is fully validated, and all FK relations exist,
|
||||
# e.g. `newrow.get("Assigned To")`, is a Django User instance.
|
||||
assignee = newrow.get("Assigned To") if newrow.get("Assigned To") else None
|
||||
created_date = newrow.get("Created Date") if newrow.get("Created Date") else datetime.datetime.today()
|
||||
due_date = newrow.get("Due Date") if newrow.get("Due Date") else None
|
||||
priority = newrow.get("Priority") if newrow.get("Priority") else None
|
||||
|
||||
obj, created = Task.objects.update_or_create(
|
||||
created_by=newrow.get("Created By"),
|
||||
task_list=newrow.get("Task List"),
|
||||
title=newrow.get("Title"),
|
||||
defaults={
|
||||
"assigned_to": assignee,
|
||||
"completed": newrow.get("Completed"),
|
||||
"created_date": created_date,
|
||||
"due_date": due_date,
|
||||
"note": newrow.get("Note"),
|
||||
"priority": priority,
|
||||
},
|
||||
)
|
||||
self.upsert_count += 1
|
||||
msg = (
|
||||
f'Upserted task {obj.id}: "{obj.title}"'
|
||||
f' in list "{obj.task_list}" (group "{obj.task_list.group}")'
|
||||
)
|
||||
self.upserts.append(msg)
|
||||
|
||||
self.summaries.append(f"Processed {self.line_count} CSV rows")
|
||||
self.summaries.append(f"Upserted {self.upsert_count} rows")
|
||||
self.summaries.append(f"Skipped {self.line_count - self.upsert_count} rows")
|
||||
|
||||
return {"summaries": self.summaries, "upserts": self.upserts, "errors": self.errors}
|
||||
|
||||
def validate_row(self, row):
|
||||
"""Perform data integrity checks and set default values. Returns a valid object for insertion, or False.
|
||||
Errors are stored for later display. Intentionally not broken up into separate validator functions because
|
||||
there are interdpendencies, such as checking for existing `creator` in one place and then using
|
||||
that creator for group membership check in others."""
|
||||
|
||||
row_errors = []
|
||||
|
||||
# #######################
|
||||
# Task creator must exist
|
||||
if not row.get("Created By"):
|
||||
msg = f"Missing required task creator."
|
||||
row_errors.append(msg)
|
||||
|
||||
creator = get_user_model().objects.filter(username=row.get("Created By")).first()
|
||||
if not creator:
|
||||
msg = f"Invalid task creator {row.get('Created By')}"
|
||||
row_errors.append(msg)
|
||||
|
||||
# #######################
|
||||
# If specified, Assignee must exist
|
||||
assignee = None # Perfectly valid
|
||||
if row.get("Assigned To"):
|
||||
assigned = get_user_model().objects.filter(username=row.get("Assigned To"))
|
||||
if assigned.exists():
|
||||
assignee = assigned.first()
|
||||
else:
|
||||
msg = f"Missing or invalid task assignee {row.get('Assigned To')}"
|
||||
row_errors.append(msg)
|
||||
|
||||
# #######################
|
||||
# Group must exist
|
||||
try:
|
||||
target_group = Group.objects.get(name=row.get("Group"))
|
||||
except Group.DoesNotExist:
|
||||
msg = f"Could not find group {row.get('Group')}."
|
||||
row_errors.append(msg)
|
||||
target_group = None
|
||||
|
||||
# #######################
|
||||
# Task creator must be in the target group
|
||||
if creator and target_group not in creator.groups.all():
|
||||
msg = f"{creator} is not in group {target_group}"
|
||||
row_errors.append(msg)
|
||||
|
||||
# #######################
|
||||
# Assignee must be in the target group
|
||||
if assignee and target_group not in assignee.groups.all():
|
||||
msg = f"{assignee} is not in group {target_group}"
|
||||
row_errors.append(msg)
|
||||
|
||||
# #######################
|
||||
# Task list must exist in the target group
|
||||
try:
|
||||
tasklist = TaskList.objects.get(name=row.get("Task List"), group=target_group)
|
||||
row["Task List"] = tasklist
|
||||
except TaskList.DoesNotExist:
|
||||
msg = f"Task list {row.get('Task List')} in group {target_group} does not exist"
|
||||
row_errors.append(msg)
|
||||
|
||||
# #######################
|
||||
# Validate Dates
|
||||
datefields = ["Due Date", "Created Date"]
|
||||
for datefield in datefields:
|
||||
datestring = row.get(datefield)
|
||||
if datestring:
|
||||
valid_date = self.validate_date(datestring)
|
||||
if valid_date:
|
||||
row[datefield] = valid_date
|
||||
else:
|
||||
msg = f"Could not convert {datefield} {datestring} to valid date instance"
|
||||
row_errors.append(msg)
|
||||
|
||||
# #######################
|
||||
# Group membership checks have passed
|
||||
row["Created By"] = creator
|
||||
row["Group"] = target_group
|
||||
if assignee:
|
||||
row["Assigned To"] = assignee
|
||||
|
||||
# Set Completed
|
||||
row["Completed"] = (row["Completed"] == "Yes")
|
||||
|
||||
# #######################
|
||||
if row_errors:
|
||||
self.errors.append({self.line_count: row_errors})
|
||||
return False
|
||||
|
||||
# No errors:
|
||||
return row
|
||||
|
||||
def validate_date(self, datestring):
|
||||
"""Inbound date string from CSV translates to a valid python date."""
|
||||
try:
|
||||
date_obj = datetime.datetime.strptime(datestring, "%Y-%m-%d")
|
||||
return date_obj
|
||||
except ValueError:
|
||||
return False
|
85
todo/templates/todo/import_csv.html
Normal file
85
todo/templates/todo/import_csv.html
Normal file
|
@ -0,0 +1,85 @@
|
|||
{% extends "todo/base.html" %}
|
||||
{% load static %}
|
||||
|
||||
{% block title %}Import CSV{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h2>
|
||||
Import CSV
|
||||
</h2>
|
||||
|
||||
<p>
|
||||
Batch-import tasks by uploading a specifically-formatted CSV.
|
||||
See documentation for formatting rules.
|
||||
Successs and failures will be reported here.
|
||||
</p>
|
||||
|
||||
{% if results %}
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
Results of CSV upload
|
||||
</div>
|
||||
<div class="card-body">
|
||||
|
||||
{% if results.summaries %}
|
||||
<p>
|
||||
<b>Summary:</b>
|
||||
</p>
|
||||
<ul>
|
||||
{% for line in results.summaries %}
|
||||
<li>{{ line }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
|
||||
{% if results.upserts %}
|
||||
<p>
|
||||
<b>Upserts (tasks created or updated):</b>
|
||||
</p>
|
||||
<ul>
|
||||
{% for line in results.upserts %}
|
||||
<li>{{ line }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
|
||||
{% if results.errors %}
|
||||
<p>
|
||||
<b>Errors (tasks NOT created or updated):</b>
|
||||
</p>
|
||||
<ul>
|
||||
{% for error_row in results.errors %}
|
||||
{% for k, error_list in error_row.items %}
|
||||
<li>CSV row {{ k }}</li>
|
||||
<ul>
|
||||
{% for err in error_list %}
|
||||
<li>{{ err }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
Upload Tasks
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<form method="post" enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<div>
|
||||
<input type="file" name="csvfile" accept="text/csv">
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary mt-4">Upload</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
4
todo/tests/data/csv_import_data.csv
Normal file
4
todo/tests/data/csv_import_data.csv
Normal file
|
@ -0,0 +1,4 @@
|
|||
Title,Group,Task List,Created By,Created Date,Due Date,Completed,Assigned To,Note,Priority
|
||||
Make dinner,Workgroup One,Zip,u1,,2019-06-14,No,u1,This is note one,3
|
||||
Bake bread,Workgroup One,Zip,u1,2012-03-14,,Yes,,,
|
||||
Bring dessert,Workgroup Two,Zap,u2,2015-06-248,,,,This is note two,77
|
|
76
todo/tests/test_import.py
Normal file
76
todo/tests/test_import.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth import get_user_model
|
||||
|
||||
from todo.models import Task, TaskList
|
||||
from todo.operations.csv_importer import CSVImporter
|
||||
|
||||
|
||||
"""
|
||||
Exercise the "Import CSV" feature, which shares a functional module that serves
|
||||
both the `import_csv` management command and the "Import CSV" web interface.
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.fixture
|
||||
def import_setup(todo_setup):
|
||||
app_path = Path(__file__).resolve().parent.parent
|
||||
filepath = Path(app_path, "tests/data/csv_import_data.csv")
|
||||
with filepath.open(mode="r", encoding="utf-8-sig") as fileobj:
|
||||
importer = CSVImporter()
|
||||
results = importer.upsert(fileobj, as_string_obj=True)
|
||||
assert results
|
||||
return {"results": results}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_setup(todo_setup):
|
||||
"""Confirm what we should have from conftest, prior to importing CSV."""
|
||||
assert TaskList.objects.all().count() == 2
|
||||
assert Task.objects.all().count() == 6
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_import(import_setup):
|
||||
"""Confirm that importing the CSV gave us two more rows (one should have been skipped)"""
|
||||
assert Task.objects.all().count() == 8 # 2 out of 3 rows should have imported; one was an error
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_report(import_setup):
|
||||
"""Confirm that importing the CSV returned expected report messaging."""
|
||||
|
||||
results = import_setup["results"]
|
||||
|
||||
assert "Processed 3 CSV rows" in results["summaries"]
|
||||
assert "Upserted 2 rows" in results["summaries"]
|
||||
assert "Skipped 1 rows" in results["summaries"]
|
||||
|
||||
assert isinstance(results["errors"], list)
|
||||
assert len(results["errors"]) == 1
|
||||
assert (
|
||||
results["errors"][0].get(3)[0]
|
||||
== "Could not convert Created Date 2015-06-248 to valid date instance"
|
||||
)
|
||||
|
||||
assert (
|
||||
'Upserted task 7: "Make dinner" in list "Zip" (group "Workgroup One")' in results["upserts"]
|
||||
)
|
||||
assert (
|
||||
'Upserted task 8: "Bake bread" in list "Zip" (group "Workgroup One")' in results["upserts"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_inserted_row(import_setup):
|
||||
"""Confirm that one inserted row is exactly right."""
|
||||
task = Task.objects.get(title="Make dinner", task_list__name="Zip")
|
||||
assert task.created_by == get_user_model().objects.get(username="u1")
|
||||
assert task.assigned_to == get_user_model().objects.get(username="u1")
|
||||
assert not task.completed
|
||||
assert task.note == "This is note one"
|
||||
assert task.priority == 3
|
||||
assert task.created_date == datetime.datetime.today().date()
|
11
todo/urls.py
11
todo/urls.py
|
@ -1,10 +1,10 @@
|
|||
from django.conf import settings
|
||||
from django.urls import path
|
||||
|
||||
from todo import views
|
||||
from todo.features import HAS_TASK_MERGE
|
||||
app_name = 'todo'
|
||||
|
||||
from django.conf import settings
|
||||
app_name = 'todo'
|
||||
|
||||
urlpatterns = [
|
||||
path(
|
||||
|
@ -59,7 +59,7 @@ urlpatterns = [
|
|||
]
|
||||
|
||||
if HAS_TASK_MERGE:
|
||||
# ensure autocomplete is optional
|
||||
# ensure mail tracker autocomplete is optional
|
||||
from todo.views.task_autocomplete import TaskAutocomplete
|
||||
urlpatterns.append(
|
||||
path(
|
||||
|
@ -83,4 +83,9 @@ urlpatterns.extend([
|
|||
'search/',
|
||||
views.search,
|
||||
name="search"),
|
||||
|
||||
path(
|
||||
'import_csv/',
|
||||
views.import_csv,
|
||||
name="import_csv"),
|
||||
])
|
||||
|
|
|
@ -8,3 +8,4 @@ from todo.views.reorder_tasks import reorder_tasks # noqa: F401
|
|||
from todo.views.search import search # noqa: F401
|
||||
from todo.views.task_detail import task_detail # noqa: F401
|
||||
from todo.views.toggle_done import toggle_done # noqa: F401
|
||||
from todo.views.import_csv import import_csv # noqa: F401
|
||||
|
|
22
todo/views/import_csv.py
Normal file
22
todo/views/import_csv.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from django.contrib.auth.decorators import login_required, user_passes_test
|
||||
from django.http import HttpResponse
|
||||
from django.shortcuts import render
|
||||
from todo.operations.csv_importer import CSVImporter
|
||||
|
||||
from todo.utils import staff_check
|
||||
|
||||
@login_required
|
||||
@user_passes_test(staff_check)
|
||||
def import_csv(request) -> HttpResponse:
|
||||
"""Import a specifically formatted CSV into stored tasks.
|
||||
"""
|
||||
|
||||
ctx = {}
|
||||
|
||||
if request.method == "POST":
|
||||
filepath = request.FILES.get('csvfile')
|
||||
importer = CSVImporter()
|
||||
results = importer.upsert(filepath)
|
||||
ctx["results"] = results
|
||||
|
||||
return render(request, "todo/import_csv.html", context=ctx)
|
Loading…
Add table
Add a link
Reference in a new issue