phase 5 dynamic alert registry (bot canonical, settings-site with fallback)
This commit is contained in:
201
services/notificationRegistry.js
Normal file
201
services/notificationRegistry.js
Normal file
@@ -0,0 +1,201 @@
|
||||
/**
|
||||
* Canonical notification alert registry.
|
||||
*
|
||||
* Single source of truth for the 30 standard-threshold-driven alert keys used
|
||||
* across surgeChecker, patternChecker, and staffNotifications. Consumed by:
|
||||
* - the three checker services (startup drift-check)
|
||||
* - routes/internalApi.js GET /notifications/alerts
|
||||
* - settings-site UI (via proxied /api/notifications/alerts, with fallback)
|
||||
*
|
||||
* Not covered here (intentionally fallback-only in the UI):
|
||||
* - rapid_t2_t3 — uses count-milestone firing, not shouldFire()
|
||||
* - chat_messages/time — owned by chatAlertChecker.js, out of Phase 5 scope
|
||||
*
|
||||
* `windowType` is the reset window used by shouldFire() for pattern keys
|
||||
* (today/week/month). For surge and unclaimed, firing is cooldown-escalating
|
||||
* rather than window-based, so windowType is null.
|
||||
*/
|
||||
|
||||
const REGISTRY = Object.freeze({
|
||||
surge: Object.freeze([
|
||||
Object.freeze({
|
||||
key: 'surge_tickets',
|
||||
description: 'Fires when total active ticket volume exceeds configured surge thresholds, signaling broad queue pressure that needs staffing attention.',
|
||||
windowType: null
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'surge_game',
|
||||
description: 'Fires when one game accumulates tickets unusually fast within the configured window, indicating a localized incident that should be triaged.',
|
||||
windowType: null
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'surge_stale',
|
||||
description: 'Fires when too many tickets stay unresolved past the stale-time threshold, prompting staff to clear aging backlog.',
|
||||
windowType: null
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'surge_needs_response',
|
||||
description: 'Fires when tickets needing a staff reply exceed count and age limits, indicating response latency is building.',
|
||||
windowType: null
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'surge_unclaimed',
|
||||
description: 'Fires when the unclaimed queue crosses configured count/age thresholds, signaling ownership gaps that need pickup.',
|
||||
windowType: null
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'surge_tier3_unclaimed',
|
||||
description: "Fires when Tier 3 tickets have been sitting unclaimed past each threshold. Escalating intervals prevent spam while ensuring critical tickets don't go unnoticed.",
|
||||
windowType: null
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'surge_no_staff',
|
||||
description: 'Fires when open-ticket load is high while no staff are detected as available, prompting immediate coverage.',
|
||||
windowType: null
|
||||
})
|
||||
]),
|
||||
|
||||
patterns: Object.freeze([
|
||||
Object.freeze({
|
||||
key: 'user_tickets',
|
||||
description: 'Detects users opening unusually high ticket counts in the active window, suggesting repeat-issue or abuse patterns.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'user_reopen',
|
||||
description: 'Detects users who repeatedly reopen or recreate issues after closure, signaling unresolved root-cause patterns.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'user_crossgame',
|
||||
description: 'Detects users reporting similar issues across multiple games in a short period, indicating broader account-level impact.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'game_surge',
|
||||
description: 'Detects game-specific ticket spikes crossing thresholds in the pattern window, signaling service instability for that title.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'game_backlog',
|
||||
description: 'Detects games accumulating unresolved backlog above threshold, implying triage capacity is lagging for that queue.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'game_resolution',
|
||||
description: 'Detects unusual drops in resolution rate for a game, indicating tickets are staying open longer than expected.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'game_spike',
|
||||
description: 'Detects abrupt short-window jumps in ticket volume for a game, flagging incidents that may need escalation.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'tag_top',
|
||||
description: 'Detects tag frequency leaders above threshold so recurring issue types can be prioritized for fixes or macros.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'tag_escalation',
|
||||
description: 'Detects tags with unusually high escalation rates, indicating categories that routinely require higher-tier handling.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'untagged_closes',
|
||||
description: 'Detects elevated counts of closed tickets without tags, prompting cleanup to preserve reporting quality.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'tag_game_corr',
|
||||
description: 'Detects strong tag-to-game concentration patterns, highlighting issue types tightly linked to specific games.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'user_esc',
|
||||
description: 'Detects users whose tickets escalate unusually often, implying complex cases that may need proactive follow-up.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'game_esc_rate',
|
||||
description: 'Detects games with escalating ticket-rate thresholds exceeded, signaling deeper technical issues for that title.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'staff_no_close',
|
||||
description: 'Detects staff with prolonged periods of claims but few closes, suggesting overloaded ownership or stuck work.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'staff_overloaded',
|
||||
description: 'Detects staff carrying ticket loads beyond threshold, indicating balancing or reassignment may be needed.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'staff_stale',
|
||||
description: 'Detects staff-owned tickets aging beyond stale limits, prompting review and unblock actions.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'staff_transfer_rate',
|
||||
description: 'Detects unusually high transfer/reassignment rates by staff, signaling ownership churn that may hurt throughput.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'staff_esc',
|
||||
description: 'Detects staff escalation counts above threshold, highlighting where extra support or training may be needed.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'staff_game_esc',
|
||||
description: 'Detects high escalation concentration for specific staff/game combinations, indicating targeted expertise gaps.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'game_tag_spike',
|
||||
description: 'Detects sudden spikes of specific tags within a game, flagging focused incident signatures.',
|
||||
windowType: 'today'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'overnight_gap',
|
||||
description: 'Detects recurring unattended overnight windows with active demand, suggesting staffing coverage gaps.',
|
||||
windowType: 'week'
|
||||
}),
|
||||
Object.freeze({
|
||||
key: 'staff_always_esc',
|
||||
description: 'Detects staff whose handled tickets escalate at consistently high rates, implying sustained tier-fit issues.',
|
||||
windowType: 'month'
|
||||
})
|
||||
]),
|
||||
|
||||
unclaimed: Object.freeze([
|
||||
Object.freeze({
|
||||
key: 'unclaimed_reminder',
|
||||
description: 'Reminds all staff notification channels about unclaimed tickets. Thresholds are per-ticket age — each threshold fires once per ticket and resets on escalation.',
|
||||
windowType: null
|
||||
})
|
||||
])
|
||||
});
|
||||
|
||||
const ALL_KEYS = Object.freeze([
|
||||
...REGISTRY.surge.map(e => e.key),
|
||||
...REGISTRY.patterns.map(e => e.key),
|
||||
...REGISTRY.unclaimed.map(e => e.key)
|
||||
]);
|
||||
|
||||
const ALL_KEYS_SET = new Set(ALL_KEYS);
|
||||
|
||||
/**
|
||||
* Throws if any of `keys` is not in the registry. Call at module load from
|
||||
* each checker that references registry keys so drift fails fast.
|
||||
*/
|
||||
function assertKeysRegistered(moduleName, keys) {
|
||||
const missing = keys.filter(k => !ALL_KEYS_SET.has(k));
|
||||
if (missing.length > 0) {
|
||||
throw new Error(
|
||||
`[notificationRegistry] ${moduleName} references keys not in REGISTRY: ${missing.join(', ')}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { REGISTRY, ALL_KEYS, assertKeysRegistered };
|
||||
@@ -7,6 +7,22 @@ const { CONFIG, parseThresholdString } = require('../config');
|
||||
const { mongoose } = require('../db-connection');
|
||||
const { getAll, get, shouldFireThreshold, onWeeklyReset } = require('./patternStore');
|
||||
const { enqueueSend } = require('./channelQueue');
|
||||
const { assertKeysRegistered } = require('./notificationRegistry');
|
||||
|
||||
// Alert keys this module fires via shouldFire()/standard threshold path.
|
||||
// rapid_t2_t3 is intentionally excluded — it uses count-milestone firing below
|
||||
// via firedCountMilestones, not the shouldFire() pipeline, so it is not part
|
||||
// of the notification registry.
|
||||
const PATTERN_ALERT_KEYS = [
|
||||
'user_tickets', 'user_reopen', 'user_crossgame',
|
||||
'game_surge', 'game_backlog', 'game_resolution', 'game_spike',
|
||||
'tag_top', 'tag_escalation', 'untagged_closes', 'tag_game_corr',
|
||||
'user_esc', 'game_esc_rate',
|
||||
'staff_no_close', 'staff_overloaded', 'staff_stale', 'staff_transfer_rate',
|
||||
'staff_esc', 'staff_game_esc',
|
||||
'game_tag_spike', 'overnight_gap', 'staff_always_esc'
|
||||
];
|
||||
assertKeysRegistered('patternChecker', PATTERN_ALERT_KEYS);
|
||||
|
||||
const Ticket = mongoose.model('Ticket');
|
||||
|
||||
|
||||
@@ -12,6 +12,11 @@ const { mongoose } = require('../db-connection');
|
||||
const { CONFIG, parseThresholdString } = require('../config');
|
||||
const { increment } = require('./patternStore');
|
||||
const { enqueueSend } = require('./channelQueue');
|
||||
const { assertKeysRegistered } = require('./notificationRegistry');
|
||||
|
||||
// Alert key this module drives. Registered to fail fast on drift.
|
||||
const UNCLAIMED_ALERT_KEYS = ['unclaimed_reminder'];
|
||||
assertKeysRegistered('staffNotifications', UNCLAIMED_ALERT_KEYS);
|
||||
|
||||
const Ticket = mongoose.model('Ticket');
|
||||
const StaffNotification = mongoose.model('StaffNotification');
|
||||
|
||||
@@ -8,6 +8,21 @@ const { mongoose } = require('../db-connection');
|
||||
const { shouldFireCooldownEscalating, clearEscalating, isStaffRecentlyActive } = require('./patternStore');
|
||||
const { getStaffAvailability, isAnyStaffAvailable } = require('./staffPresence');
|
||||
const { enqueueSend } = require('./channelQueue');
|
||||
const { assertKeysRegistered } = require('./notificationRegistry');
|
||||
|
||||
// Alert keys this module drives. Asserted against the registry at load so any
|
||||
// future drift (rename, typo, unregistered key) fails fast rather than
|
||||
// silently breaking the settings-site config editor.
|
||||
const SURGE_ALERT_KEYS = [
|
||||
'surge_tickets',
|
||||
'surge_game',
|
||||
'surge_stale',
|
||||
'surge_needs_response',
|
||||
'surge_unclaimed',
|
||||
'surge_tier3_unclaimed',
|
||||
'surge_no_staff'
|
||||
];
|
||||
assertKeysRegistered('surgeChecker', SURGE_ALERT_KEYS);
|
||||
|
||||
const Ticket = mongoose.model('Ticket');
|
||||
|
||||
|
||||
Reference in New Issue
Block a user