jbzdarkid · jbzdarkid · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ This version of the scripts uses python3, which unfortunately broke our old wiki
 - `missing_categories.py`: Searches for non-translated categories. Categories which are only in english should generally be marked as {{non-article category}}.
 - `missing_translations.py`: Generates the list of missing translations for each language compared to english, which is used by the translator's noticeboard
 - `untranslated_templates.py`: Parses templates for {{lang}} usage, and reports whether or not they are fully translated.
+- `active_discussions.py`: Searches for recent edits to talk namespaces, and reports on activity based on the number of editors.
 
 ## Weekly reports
 - `displaytitles_weekly.py`: Weekly copy of the monthly report which only runs on the past week of recent changes.

diff --git a/active_discussions.py b/active_discussions.py
@@ -0,0 +1,73 @@
+from datetime import datetime, timedelta
+from utils import pagescraper_queue, time_and_date
+from wikitools import wiki
+
+verbose = False
+one_month_ago = datetime.utcnow() - timedelta(days=30)
+one_week_ago  = datetime.utcnow() - timedelta(days=7)
+KNOWN_BOTS = ['WelcomeBOT'] # We only need to list bots which post to talkpages.
+
+def pagescraper(page, active_one_week, active_one_month):
+  if verbose:
+    print(f'Fetching revisions for {page}')
+  weekly_users = set()
+  monthly_users = set()
+  for revision in page.get_revisions(one_month_ago):
+    if revision['user'] in KNOWN_BOTS:
+      continue
+    if revision['timestamp'] > one_week_ago:
+      weekly_users.add(revision['user'])
+      monthly_users.add(revision['user'])
+    elif revision['timestamp'] > one_month_ago:
+      monthly_users.add(revision['user'])
+
+  # A discussion is considered 'active' if it has any user in the past week, or more than 3 users in the past month.
+  if len(weekly_users) >= 1:
+    active_one_week.append(page)
+  elif len(monthly_users) >= 3:
+    active_one_month.append(page)
+
+def main(w):
+  namespaces = [ns for ns in w.namespaces if 'talk' in ns.lower()]
+
+  recent_pages = set()
+  for page in w.get_recent_changes(one_month_ago, namespaces=namespaces):
+    recent_pages.add(page)
+  if verbose:
+    print(f'Found {len(recent_pages)} recently modified talkpages in the past month')
+
+  active_one_week = []
+  active_one_month = []
+  with pagescraper_queue(pagescraper, active_one_week, active_one_month) as pages:
+    for page in recent_pages:
+      pages.put(page)
+  if verbose:
+    print(f'Found {len(active_one_week)} active discussions this week')
+    print(f'Found {len(active_one_month)} active discussions this month')
+
+  output = """\
+{{{{DISPLAYTITLE: {count} active discussions}}}}
+There are '''<onlyinclude>{count}</onlyinclude>''' active discussions as of {date}.
+
+""".format(
+    count=len(active_one_week + active_one_month),
+    date=time_and_date())
+
+  active_one_week.sort()
+  output += '== Active talk pages in the past week ==\n'
+  for page in active_one_week:
+    output += f'* [[{page}]]\n'
+
+  active_one_month.sort()
+  output += '== Active talk pages in the past month ==\n'
+  for page in active_one_month:
+    output += f'* [[{page}]]\n'
+
+  return output
+
+if __name__ == '__main__':
+  verbose = True
+  w = wiki.Wiki('https://wiki.teamfortress.com/w/api.php')
+  with open('wiki_all_articles.txt', 'w') as f:
+    f.write(main(w))
+  print(f'Article written to {f.name}')
diff --git a/master.py b/master.py
@@ -19,18 +19,15 @@
 # {{lang}} template mis-ordering and lang-template duplicate keys
 # Templates sorted by usage and protect status
 # A 'missing translations' report but for dictionary entries (maybe sorted by usage, too?)
-# A report for "Edits on talkpages (not in the "user talk" namespace) in the past few days", so people can track active discussions?
+# Templates which have redirects in them
 
 # Reports I want to improve:
-# update readme (again)
 # Consider running some scripts against the Help: namespace, too
 # (like what? miscategorized, mismatched, uhhh)
 # Sort missing categories by # pages
 # Sort the output from mismatched
 # Sort the output from displaytitles
 # Threading for navboxes.py?
-# Ensure that PRs which add files also touch readme.md -> isn't this done?
-# Templates which link to redirects
 
 def edit_or_save(page_name, file_name, output, summary):
   wiki_diff_url = Page(w, page_name).edit(output, bot=True, summary=summary)
@@ -68,6 +65,7 @@ def publish_report(w, module, report_name, root, summary):
   'missing_categories': 'Untranslated categories',
   'missing_translations': 'Missing translations',
   'untranslated_templates': 'Untranslated templates',
+  'active_discussions': 'Active discussions',
 }
 
 # English-only but otherwise frequently changing reports

diff --git a/wikitools/page.py b/wikitools/page.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from time import sleep
 import functools
 import requests
@@ -107,6 +108,20 @@ def get_file_link_count(self):
       # Also, this report uses page IDs for iteration, so for now we're returning solely based on the first page of results.
       return html.count('mw-whatlinkshere-tools') # Class for (<-- links | edit)
 
+  def get_revisions(self, starttime, rvprop='user|timestamp'):
+    for data in self.wiki.get_with_continue('query', 'pages',
+      prop='revisions',
+      titles=[self.url_title],
+      rvprop=rvprop,
+      rvlimit=500,
+      rvdir='older', # Default, list from newest to oldest
+    ):
+      for revision in data['revisions']:
+        revision['timestamp'] = datetime.strptime(revision['timestamp'], '%Y-%m-%dT%H:%M:%SZ')
+        if revision['timestamp'] < starttime:
+          return
+        yield revision
+
   def edit(self, text, summary, bot=True):
     if len(text) > 3000 * 1000: # 3 KB
       text = '<span class="error">Warning: Report truncated to 3 KB</span>\n' + text[:3000 * 1000]