Import tasks via CSV (#51)

* Bare start on CSV support

* Move core of CSV importer to operations

* More validations, break out validation function

* Validate dates and TaskList; convert errors to list of dictionaries

* Finish upsert code, and documentation

* Print msgs from the mgmt command, not the operations module

* Handle BOM marks

* Handle both in-memory and local file objects

* Update readme

* Working browser-upload view

* Bail on incorrect headers

* Fix default values and finish example spreadsheet

* Change column order, update docs

* Update index.md for RTD

* First round of responses to PR feedback

* Restore independent summaries/errors/upserts properties

* PR responses

* Split off reusable date validator into separate function

* Fix URLs append

* General test suite for CSV importer
This commit is contained in:
Scot Hacker 2019-03-25 23:19:11 -07:00 committed by GitHub
parent 184084c6a8
commit 4a99d90d1e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 599 additions and 15 deletions

View file

@ -0,0 +1,4 @@
Title,Group,Task List,Created By,Created Date,Due Date,Completed,Assigned To,Note,Priority
Make dinner,Scuba Divers,Web project,shacker,,2019-06-14,No,,Please check with mgmt first,3
Bake bread,Scuba Divers,Example List,mr_random,2012-03-14,,Yes,,,
Bring dessert,Scuba Divers,Web project,user1,2015-06-248,,,user1,Every generation throws a hero up the pop charts,77
1 Title Group Task List Created By Created Date Due Date Completed Assigned To Note Priority
2 Make dinner Scuba Divers Web project shacker 2019-06-14 No Please check with mgmt first 3
3 Bake bread Scuba Divers Example List mr_random 2012-03-14 Yes
4 Bring dessert Scuba Divers Web project user1 2015-06-248 user1 Every generation throws a hero up the pop charts 77

View file

@ -0,0 +1,57 @@
import sys
from typing import Any
from pathlib import Path
from django.core.management.base import BaseCommand, CommandParser
from todo.operations.csv_importer import CSVImporter
class Command(BaseCommand):
help = """Import specifically formatted CSV file containing incoming tasks to be loaded.
For specfic format of inbound CSV, see data/import_example.csv.
For documentation on upsert logic and required fields, see README.md.
"""
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"-f", "--file", dest="file", default=None, help="File to to inbound CSV file."
)
def handle(self, *args: Any, **options: Any) -> None:
# Need a file to proceed
if not options.get("file"):
print("Sorry, we need a filename to work from.")
sys.exit(1)
filepath = Path(options["file"])
if not filepath.exists():
print(f"Sorry, couldn't find file: {filepath}")
sys.exit(1)
# Encoding "utf-8-sig" means "ignore byte order mark (BOM), which Excel inserts when saving CSVs."
with filepath.open(mode="r", encoding="utf-8-sig") as fileobj:
importer = CSVImporter()
results = importer.upsert(fileobj, as_string_obj=True)
# Report successes, failures and summaries
print()
if results["upserts"]:
for upsert_msg in results["upserts"]:
print(upsert_msg)
# Stored errors has the form:
# self.errors = [{3: ["Incorrect foo", "Non-existent bar"]}, {7: [...]}]
if results["errors"]:
for error_dict in results["errors"]:
for k, error_list in error_dict.items():
print(f"\nSkipped CSV row {k}:")
for msg in error_list:
print(f"- {msg}")
print()
if results["summaries"]:
for summary_msg in results["summaries"]:
print(summary_msg)

View file

@ -0,0 +1,22 @@
# Generated by Django 2.1.7 on 2019-03-18 23:14
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('todo', '0008_mail_tracker'),
]
operations = [
migrations.AlterModelOptions(
name='task',
options={'ordering': ['priority', 'created_date']},
),
migrations.AlterField(
model_name='task',
name='priority',
field=models.PositiveIntegerField(blank=True, null=True),
),
]

View file

@ -82,7 +82,7 @@ class Task(models.Model):
on_delete=models.CASCADE,
)
note = models.TextField(blank=True, null=True)
priority = models.PositiveIntegerField()
priority = models.PositiveIntegerField(blank=True, null=True)
# Has due date for an instance of this object passed?
def overdue_status(self):
@ -115,7 +115,7 @@ class Task(models.Model):
self.delete()
class Meta:
ordering = ["priority"]
ordering = ["priority", "created_date"]
class Comment(models.Model):

View file

View file

@ -0,0 +1,197 @@
import codecs
import csv
import datetime
import logging
from django.contrib.auth import get_user_model
from django.contrib.auth.models import Group
from todo.models import Task, TaskList
log = logging.getLogger(__name__)
class CSVImporter:
"""Core upsert functionality for CSV import, for re-use by `import_csv` management command, web UI and tests.
Supplies a detailed log of what was and was not imported at the end. See README for usage notes.
"""
def __init__(self):
self.errors = []
self.upserts = []
self.summaries = []
self.line_count = 0
self.upsert_count = 0
def upsert(self, fileobj, as_string_obj=False):
"""Expects a file *object*, not a file path. This is important because this has to work for both
the management command and the web uploader; the web uploader will pass in in-memory file
with no path!
Header row is:
Title, Group, Task List, Created Date, Due Date, Completed, Created By, Assigned To, Note, Priority
"""
if as_string_obj:
# fileobj comes from mgmt command
csv_reader = csv.DictReader(fileobj)
else:
# fileobj comes from browser upload (in-memory)
csv_reader = csv.DictReader(codecs.iterdecode(fileobj, "utf-8"))
# DI check: Do we have expected header row?
header = csv_reader.fieldnames
expected = [
"Title",
"Group",
"Task List",
"Created By",
"Created Date",
"Due Date",
"Completed",
"Assigned To",
"Note",
"Priority",
]
if header != expected:
self.results.get("summaries").append(
f"Inbound data does not have expected columns.\nShould be: {expected}"
)
return self.results
for row in csv_reader:
self.line_count += 1
newrow = self.validate_row(row)
if newrow:
# newrow at this point is fully validated, and all FK relations exist,
# e.g. `newrow.get("Assigned To")`, is a Django User instance.
assignee = newrow.get("Assigned To") if newrow.get("Assigned To") else None
created_date = newrow.get("Created Date") if newrow.get("Created Date") else datetime.datetime.today()
due_date = newrow.get("Due Date") if newrow.get("Due Date") else None
priority = newrow.get("Priority") if newrow.get("Priority") else None
obj, created = Task.objects.update_or_create(
created_by=newrow.get("Created By"),
task_list=newrow.get("Task List"),
title=newrow.get("Title"),
defaults={
"assigned_to": assignee,
"completed": newrow.get("Completed"),
"created_date": created_date,
"due_date": due_date,
"note": newrow.get("Note"),
"priority": priority,
},
)
self.upsert_count += 1
msg = (
f'Upserted task {obj.id}: "{obj.title}"'
f' in list "{obj.task_list}" (group "{obj.task_list.group}")'
)
self.upserts.append(msg)
self.summaries.append(f"Processed {self.line_count} CSV rows")
self.summaries.append(f"Upserted {self.upsert_count} rows")
self.summaries.append(f"Skipped {self.line_count - self.upsert_count} rows")
return {"summaries": self.summaries, "upserts": self.upserts, "errors": self.errors}
def validate_row(self, row):
"""Perform data integrity checks and set default values. Returns a valid object for insertion, or False.
Errors are stored for later display. Intentionally not broken up into separate validator functions because
there are interdpendencies, such as checking for existing `creator` in one place and then using
that creator for group membership check in others."""
row_errors = []
# #######################
# Task creator must exist
if not row.get("Created By"):
msg = f"Missing required task creator."
row_errors.append(msg)
creator = get_user_model().objects.filter(username=row.get("Created By")).first()
if not creator:
msg = f"Invalid task creator {row.get('Created By')}"
row_errors.append(msg)
# #######################
# If specified, Assignee must exist
assignee = None # Perfectly valid
if row.get("Assigned To"):
assigned = get_user_model().objects.filter(username=row.get("Assigned To"))
if assigned.exists():
assignee = assigned.first()
else:
msg = f"Missing or invalid task assignee {row.get('Assigned To')}"
row_errors.append(msg)
# #######################
# Group must exist
try:
target_group = Group.objects.get(name=row.get("Group"))
except Group.DoesNotExist:
msg = f"Could not find group {row.get('Group')}."
row_errors.append(msg)
target_group = None
# #######################
# Task creator must be in the target group
if creator and target_group not in creator.groups.all():
msg = f"{creator} is not in group {target_group}"
row_errors.append(msg)
# #######################
# Assignee must be in the target group
if assignee and target_group not in assignee.groups.all():
msg = f"{assignee} is not in group {target_group}"
row_errors.append(msg)
# #######################
# Task list must exist in the target group
try:
tasklist = TaskList.objects.get(name=row.get("Task List"), group=target_group)
row["Task List"] = tasklist
except TaskList.DoesNotExist:
msg = f"Task list {row.get('Task List')} in group {target_group} does not exist"
row_errors.append(msg)
# #######################
# Validate Dates
datefields = ["Due Date", "Created Date"]
for datefield in datefields:
datestring = row.get(datefield)
if datestring:
valid_date = self.validate_date(datestring)
if valid_date:
row[datefield] = valid_date
else:
msg = f"Could not convert {datefield} {datestring} to valid date instance"
row_errors.append(msg)
# #######################
# Group membership checks have passed
row["Created By"] = creator
row["Group"] = target_group
if assignee:
row["Assigned To"] = assignee
# Set Completed
row["Completed"] = (row["Completed"] == "Yes")
# #######################
if row_errors:
self.errors.append({self.line_count: row_errors})
return False
# No errors:
return row
def validate_date(self, datestring):
"""Inbound date string from CSV translates to a valid python date."""
try:
date_obj = datetime.datetime.strptime(datestring, "%Y-%m-%d")
return date_obj
except ValueError:
return False

View file

@ -0,0 +1,85 @@
{% extends "todo/base.html" %}
{% load static %}
{% block title %}Import CSV{% endblock %}
{% block content %}
<h2>
Import CSV
</h2>
<p>
Batch-import tasks by uploading a specifically-formatted CSV.
See documentation for formatting rules.
Successs and failures will be reported here.
</p>
{% if results %}
<div class="card mb-4">
<div class="card-header">
Results of CSV upload
</div>
<div class="card-body">
{% if results.summaries %}
<p>
<b>Summary:</b>
</p>
<ul>
{% for line in results.summaries %}
<li>{{ line }}</li>
{% endfor %}
</ul>
{% endif %}
{% if results.upserts %}
<p>
<b>Upserts (tasks created or updated):</b>
</p>
<ul>
{% for line in results.upserts %}
<li>{{ line }}</li>
{% endfor %}
</ul>
{% endif %}
{% if results.errors %}
<p>
<b>Errors (tasks NOT created or updated):</b>
</p>
<ul>
{% for error_row in results.errors %}
{% for k, error_list in error_row.items %}
<li>CSV row {{ k }}</li>
<ul>
{% for err in error_list %}
<li>{{ err }}</li>
{% endfor %}
</ul>
{% endfor %}
{% endfor %}
</ul>
{% endif %}
</div>
</div>
{% endif %}
<div class="card">
<div class="card-header">
Upload Tasks
</div>
<div class="card-body">
<form method="post" enctype="multipart/form-data">
{% csrf_token %}
<div>
<input type="file" name="csvfile" accept="text/csv">
</div>
<button type="submit" class="btn btn-primary mt-4">Upload</button>
</form>
</div>
</div>
{% endblock %}

View file

@ -0,0 +1,4 @@
Title,Group,Task List,Created By,Created Date,Due Date,Completed,Assigned To,Note,Priority
Make dinner,Workgroup One,Zip,u1,,2019-06-14,No,u1,This is note one,3
Bake bread,Workgroup One,Zip,u1,2012-03-14,,Yes,,,
Bring dessert,Workgroup Two,Zap,u2,2015-06-248,,,,This is note two,77
1 Title Group Task List Created By Created Date Due Date Completed Assigned To Note Priority
2 Make dinner Workgroup One Zip u1 2019-06-14 No u1 This is note one 3
3 Bake bread Workgroup One Zip u1 2012-03-14 Yes
4 Bring dessert Workgroup Two Zap u2 2015-06-248 This is note two 77

76
todo/tests/test_import.py Normal file
View file

@ -0,0 +1,76 @@
import datetime
from pathlib import Path
import pytest
from django.contrib.auth import get_user_model
from todo.models import Task, TaskList
from todo.operations.csv_importer import CSVImporter
"""
Exercise the "Import CSV" feature, which shares a functional module that serves
both the `import_csv` management command and the "Import CSV" web interface.
"""
@pytest.mark.django_db
@pytest.fixture
def import_setup(todo_setup):
app_path = Path(__file__).resolve().parent.parent
filepath = Path(app_path, "tests/data/csv_import_data.csv")
with filepath.open(mode="r", encoding="utf-8-sig") as fileobj:
importer = CSVImporter()
results = importer.upsert(fileobj, as_string_obj=True)
assert results
return {"results": results}
@pytest.mark.django_db
def test_setup(todo_setup):
"""Confirm what we should have from conftest, prior to importing CSV."""
assert TaskList.objects.all().count() == 2
assert Task.objects.all().count() == 6
@pytest.mark.django_db
def test_import(import_setup):
"""Confirm that importing the CSV gave us two more rows (one should have been skipped)"""
assert Task.objects.all().count() == 8 # 2 out of 3 rows should have imported; one was an error
@pytest.mark.django_db
def test_report(import_setup):
"""Confirm that importing the CSV returned expected report messaging."""
results = import_setup["results"]
assert "Processed 3 CSV rows" in results["summaries"]
assert "Upserted 2 rows" in results["summaries"]
assert "Skipped 1 rows" in results["summaries"]
assert isinstance(results["errors"], list)
assert len(results["errors"]) == 1
assert (
results["errors"][0].get(3)[0]
== "Could not convert Created Date 2015-06-248 to valid date instance"
)
assert (
'Upserted task 7: "Make dinner" in list "Zip" (group "Workgroup One")' in results["upserts"]
)
assert (
'Upserted task 8: "Bake bread" in list "Zip" (group "Workgroup One")' in results["upserts"]
)
@pytest.mark.django_db
def test_inserted_row(import_setup):
"""Confirm that one inserted row is exactly right."""
task = Task.objects.get(title="Make dinner", task_list__name="Zip")
assert task.created_by == get_user_model().objects.get(username="u1")
assert task.assigned_to == get_user_model().objects.get(username="u1")
assert not task.completed
assert task.note == "This is note one"
assert task.priority == 3
assert task.created_date == datetime.datetime.today().date()

View file

@ -1,10 +1,10 @@
from django.conf import settings
from django.urls import path
from todo import views
from todo.features import HAS_TASK_MERGE
app_name = 'todo'
from django.conf import settings
app_name = 'todo'
urlpatterns = [
path(
@ -59,7 +59,7 @@ urlpatterns = [
]
if HAS_TASK_MERGE:
# ensure autocomplete is optional
# ensure mail tracker autocomplete is optional
from todo.views.task_autocomplete import TaskAutocomplete
urlpatterns.append(
path(
@ -83,4 +83,9 @@ urlpatterns.extend([
'search/',
views.search,
name="search"),
path(
'import_csv/',
views.import_csv,
name="import_csv"),
])

View file

@ -8,3 +8,4 @@ from todo.views.reorder_tasks import reorder_tasks # noqa: F401
from todo.views.search import search # noqa: F401
from todo.views.task_detail import task_detail # noqa: F401
from todo.views.toggle_done import toggle_done # noqa: F401
from todo.views.import_csv import import_csv # noqa: F401

22
todo/views/import_csv.py Normal file
View file

@ -0,0 +1,22 @@
from django.contrib.auth.decorators import login_required, user_passes_test
from django.http import HttpResponse
from django.shortcuts import render
from todo.operations.csv_importer import CSVImporter
from todo.utils import staff_check
@login_required
@user_passes_test(staff_check)
def import_csv(request) -> HttpResponse:
"""Import a specifically formatted CSV into stored tasks.
"""
ctx = {}
if request.method == "POST":
filepath = request.FILES.get('csvfile')
importer = CSVImporter()
results = importer.upsert(filepath)
ctx["results"] = results
return render(request, "todo/import_csv.html", context=ctx)