Skip to content

Commit

Permalink
refactor: generate I-D bibxml files via celery (#7426)
Browse files Browse the repository at this point in the history
* refactor: task to generate_draft_bibxml_files

* test: test task/utility methods

* chore: add periodic task

* chore: remove generate_draft_bibxml_files.py

* chore: further prune /bin/hourly
  • Loading branch information
jennifer-richards committed May 16, 2024
1 parent de8b3b5 commit ffb9eb1
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 108 deletions.
18 changes: 0 additions & 18 deletions bin/hourly
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,15 @@
# This script is expected to be triggered by cron from
# /etc/cron.d/datatracker
export LANG=en_US.UTF-8
export PYTHONIOENCODING=utf-8

# Make sure we stop if something goes wrong:
program=${0##*/}
trap 'echo "$program($LINENO): Command failed with error code $? ([$$] $0 $*)"; exit 1' ERR

DTDIR=/a/www/ietf-datatracker/web
cd $DTDIR/

# Set up the virtual environment
source $DTDIR/env/bin/activate

logger -p user.info -t cron "Running $DTDIR/bin/hourly"

# Generate some static files
ID=/a/ietfdata/doc/draft/repository
DERIVED=/a/ietfdata/derived
DOWNLOAD=/a/www/www6s/download

CHARTER=/a/www/ietf-ftp/charter
wget -q https://datatracker.ietf.org/wg/1wg-charters-by-acronym.txt -O $CHARTER/1wg-charters-by-acronym.txt
wget -q https://datatracker.ietf.org/wg/1wg-charters.txt -O $CHARTER/1wg-charters.txt

# Regenerate the last week of bibxml-ids
$DTDIR/ietf/manage.py generate_draft_bibxml_files

# Create and update group wikis
#$DTDIR/ietf/manage.py create_group_wikis

# exit 0
84 changes: 0 additions & 84 deletions ietf/doc/management/commands/generate_draft_bibxml_files.py

This file was deleted.

31 changes: 29 additions & 2 deletions ietf/doc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path

from django.conf import settings
from django.utils import timezone

from ietf.utils import log
from ietf.utils.timezone import datetime_today
Expand All @@ -24,8 +25,13 @@
send_expire_warning_for_draft,
)
from .lastcall import get_expired_last_calls, expire_last_call
from .models import Document
from .utils import generate_idnits2_rfc_status, generate_idnits2_rfcs_obsoleted
from .models import Document, NewRevisionDocEvent
from .utils import (
generate_idnits2_rfc_status,
generate_idnits2_rfcs_obsoleted,
update_or_create_draft_bibxml_file,
ensure_draft_bibxml_path_exists,
)


@shared_task
Expand Down Expand Up @@ -90,3 +96,24 @@ def generate_idnits2_rfcs_obsoleted_task():
outpath.write_text(blob, encoding="utf8")
except Exception as e:
log.log(f"failed to write idnits2-rfcs-obsoleted: {e}")


@shared_task
def generate_draft_bibxml_files_task(days=7, process_all=False):
"""Generate bibxml files for recently updated docs
If process_all is False (the default), processes only docs with new revisions
in the last specified number of days.
"""
ensure_draft_bibxml_path_exists()
doc_events = NewRevisionDocEvent.objects.filter(
type="new_revision",
doc__type_id="draft",
).order_by("time")
if not process_all:
doc_events = doc_events.filter(time__gte=timezone.now() - datetime.timedelta(days=days))
for event in doc_events:
try:
update_or_create_draft_bibxml_file(event.doc, event.rev)
except Exception as err:
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")
90 changes: 88 additions & 2 deletions ietf/doc/tests_tasks.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
# Copyright The IETF Trust 2024, All Rights Reserved
import datetime
import mock

from pathlib import Path

from django.conf import settings
from django.utils import timezone

from ietf.utils.test_utils import TestCase
from ietf.utils.timezone import datetime_today

from .factories import DocumentFactory
from .models import Document
from .factories import DocumentFactory, NewRevisionDocEventFactory
from .models import Document, NewRevisionDocEvent
from .tasks import (
expire_ids_task,
expire_last_calls_task,
generate_draft_bibxml_files_task,
generate_idnits2_rfcs_obsoleted_task,
generate_idnits2_rfc_status_task,
notify_expirations_task,
Expand Down Expand Up @@ -114,3 +117,86 @@ def test_generate_idnits2_rfcs_obsoleted_task(self, mock_generate):
"dåtå".encode("utf8"),
(Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted").read_bytes(),
)

@mock.patch("ietf.doc.tasks.ensure_draft_bibxml_path_exists")
@mock.patch("ietf.doc.tasks.update_or_create_draft_bibxml_file")
def test_generate_draft_bibxml_files_task(self, mock_create, mock_ensure_path):
now = timezone.now()
very_old_event = NewRevisionDocEventFactory(
time=now - datetime.timedelta(days=1000), rev="17"
)
old_event = NewRevisionDocEventFactory(
time=now - datetime.timedelta(days=8), rev="03"
)
young_event = NewRevisionDocEventFactory(
time=now - datetime.timedelta(days=6), rev="06"
)
# a couple that should always be ignored
NewRevisionDocEventFactory(
time=now - datetime.timedelta(days=6), rev="09", doc__type_id="rfc" # not a draft
)
NewRevisionDocEventFactory(
type="changed_document", # not a "new_revision" type
time=now - datetime.timedelta(days=6),
rev="09",
doc__type_id="rfc",
)

# Get rid of the "00" events created by the factories -- they're just noise for this test
NewRevisionDocEvent.objects.filter(rev="00").delete()

# default args - look back 7 days
generate_draft_bibxml_files_task()
self.assertTrue(mock_ensure_path.called)
self.assertCountEqual(
mock_create.call_args_list, [mock.call(young_event.doc, young_event.rev)]
)
mock_create.reset_mock()
mock_ensure_path.reset_mock()

# shorter lookback
generate_draft_bibxml_files_task(days=5)
self.assertTrue(mock_ensure_path.called)
self.assertCountEqual(mock_create.call_args_list, [])
mock_create.reset_mock()
mock_ensure_path.reset_mock()

# longer lookback
generate_draft_bibxml_files_task(days=9)
self.assertTrue(mock_ensure_path.called)
self.assertCountEqual(
mock_create.call_args_list,
[
mock.call(young_event.doc, young_event.rev),
mock.call(old_event.doc, old_event.rev),
],
)
mock_create.reset_mock()
mock_ensure_path.reset_mock()

# everything
generate_draft_bibxml_files_task(process_all=True)
self.assertTrue(mock_ensure_path.called)
self.assertCountEqual(
mock_create.call_args_list,
[
mock.call(young_event.doc, young_event.rev),
mock.call(old_event.doc, old_event.rev),
mock.call(very_old_event.doc, very_old_event.rev),
],
)
mock_create.reset_mock()
mock_ensure_path.reset_mock()

# everything should still be tried, even if there's an exception
mock_create.side_effect = RuntimeError
generate_draft_bibxml_files_task(process_all=True)
self.assertTrue(mock_ensure_path.called)
self.assertCountEqual(
mock_create.call_args_list,
[
mock.call(young_event.doc, young_event.rev),
mock.call(old_event.doc, old_event.rev),
mock.call(very_old_event.doc, very_old_event.rev),
],
)
53 changes: 51 additions & 2 deletions ietf/doc/tests_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import datetime
import debug # pyflakes:ignore

from unittest.mock import patch
from pathlib import Path
from unittest.mock import call, patch

from django.conf import settings
from django.db import IntegrityError
from django.test.utils import override_settings
from django.utils import timezone
Expand All @@ -16,7 +18,8 @@
from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor
from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors,
fuzzy_find_documents, rebuild_reference_relations, build_file_urls)
fuzzy_find_documents, rebuild_reference_relations, build_file_urls,
ensure_draft_bibxml_path_exists, update_or_create_draft_bibxml_file)
from ietf.utils.draft import Draft, PlaintextDraft
from ietf.utils.xmldraft import XMLDraft

Expand Down Expand Up @@ -484,3 +487,49 @@ def test_xml_and_plaintext(self, mock_init, mock_get_refs, mock_plaintext_init):
(self.updated.name, 'updates'),
]
)


class DraftBibxmlTests(TestCase):
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ["BIBXML_BASE_PATH"]

def test_ensure_draft_bibxml_path_exists(self):
expected = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
self.assertFalse(expected.exists())
ensure_draft_bibxml_path_exists()
self.assertTrue(expected.is_dir()) # false if does not exist or is not dir

@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
def test_create_draft_bibxml_file(self, mock):
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate

doc = DocumentFactory()
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26

update_or_create_draft_bibxml_file(doc, "26")
self.assertEqual(mock.call_count, 1)
self.assertEqual(mock.call_args, call(doc, "26"))
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")

@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
def test_update_draft_bibxml_file(self, mock):
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate

doc = DocumentFactory()
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26
ref_path.write_text("Old data")

# should replace it
update_or_create_draft_bibxml_file(doc, "26")
self.assertEqual(mock.call_count, 1)
self.assertEqual(mock.call_args, call(doc, "26"))
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")

# should leave it alone if it differs only by leading/trailing whitespace
mock.reset_mock()
mock.return_value = " \n This\nis\nmy\nbibxml "
update_or_create_draft_bibxml_file(doc, "26")
self.assertEqual(mock.call_count, 1)
self.assertEqual(mock.call_args, call(doc, "26"))
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
17 changes: 17 additions & 0 deletions ietf/doc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1413,3 +1413,20 @@ def investigate_fragment(name_fragment):
unverifiable_collections=unverifiable_collections,
unexpected=unexpected,
)


def update_or_create_draft_bibxml_file(doc, rev):
log.assertion("doc.type_id == 'draft'")
normalized_bibxml = re.sub(r"\r\n?", r"\n", bibxml_for_draft(doc, rev))
ref_rev_file_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" / f"reference.I-D.{doc.name}-{rev}.xml"
try:
existing_bibxml = ref_rev_file_path.read_text(encoding="utf8")
except IOError:
existing_bibxml = ""
if normalized_bibxml.strip() != existing_bibxml.strip():
log.log(f"Writing {ref_rev_file_path}")
ref_rev_file_path.write_text(normalized_bibxml, encoding="utf8")


def ensure_draft_bibxml_path_exists():
(Path(settings.BIBXML_BASE_PATH) / "bibxml-ids").mkdir(exist_ok=True)
10 changes: 10 additions & 0 deletions ietf/utils/management/commands/periodic_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,16 @@ def create_default_tasks(self):
),
)

PeriodicTask.objects.get_or_create(
name="Generate I-D bibxml files",
task="ietf.doc.tasks.generate_draft_bibxml_files_task",
defaults=dict(
enabled=False,
crontab=self.crontabs["hourly"],
description="Generate draft bibxml files for the last week's drafts",
),
)

def show_tasks(self):
for label, crontab in self.crontabs.items():
tasks = PeriodicTask.objects.filter(crontab=crontab).order_by(
Expand Down

0 comments on commit ffb9eb1

Please sign in to comment.