feat: redact IPv4 and IPv6 addresses from PZ log content

Adds a fourth pass to ProjectZomboidRedactor that scrubs IPv4
(strict 0-255 octets, optional :port suffix) and IPv6 (full,
abbreviated, bracketed-with-port, IPv4-mapped) addresses, replacing
them with the literal [REDACTED_IP]. The new pass runs first
because it is pattern-disjoint from the Steam-ID -> name -> coords
chain. A single redactIpAddresses(bool) toggle controls both
families; the existing toggles are unchanged. Strict regexes plus
filter_var() validation prevent false positives on PZ timestamps
(12:00:00.000) and PHP/Java scope ops (Foo::bar). 20 new tests
cover bare/with-port/multiple/loopback/boundary IPv4, full /
abbreviated / bracketed / IPv4-mapped IPv6, scope-op rejection,
timestamp rejection, Steam-ID non-collision, toggle-off, and
idempotence.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 16:31:10 +00:00
parent b6949ff0c3
commit 3df6836909
3 changed files with 318 additions and 7 deletions

View File

@@ -7,15 +7,24 @@ use IndifferentKetchup\Codex\Util\RedactorInterface;
/**
* Render-time PII filter for Project Zomboid log content.
*
* Applies up to three sequential regex passes over the raw log string,
* Applies up to four sequential regex passes over the raw log string,
* each controlled by a boolean toggle (all enabled by default):
*
* 1. Steam ID pass — replaces 17-digit Steam IDs with a placeholder token.
* 2. Player name pass — replaces player display names with a placeholder
* 1. IP address pass — replaces IPv4 addresses (with optional :port
* suffix) and IPv6 addresses (full, abbreviated, bracketed, and
* IPv4-mapped forms; all with optional :port when bracketed) with
* a placeholder token. Pattern-disjoint from the other passes.
* 2. Steam ID pass — replaces 17-digit Steam IDs with a placeholder
* token.
* 3. Player name pass — replaces player display names with a placeholder
* token. This pass anchors on the already-redacted Steam ID token, so
* the ordering Steam ID -> name -> coordinates is mandatory.
* 3. Coordinates pass — replaces world coordinate triplets with a placeholder
* token.
* 4. Coordinates pass — replaces world coordinate triplets with a
* placeholder token.
*
* Pass 1 runs first by convention, not dependency: it shares no anchors
* with passes 2-4 and could run anywhere in the chain without affecting
* their output.
*
* All regex passes use the /u flag for Unicode safety.
*
@@ -24,6 +33,29 @@ use IndifferentKetchup\Codex\Util\RedactorInterface;
*/
class ProjectZomboidRedactor implements RedactorInterface
{
/** Generic placeholder substituted for every matched IPv4 or IPv6 address (with port suffix consumed when present). */
public const string IP_REPLACEMENT = '[REDACTED_IP]';
/** Strict IPv4 with valid 0-255 octets and optional :port suffix. Lookarounds reject matches embedded in longer alphanumeric or dotted-decimal tokens; the (?<!\d\.) / (?!\.\d) pair specifically prevents matching inside an N-octet (N>4) sequence like 1.2.3.4.5 while still allowing a trailing sentence period after the IP/port. */
public const string IPV4_REGEX = '/'
. '(?<![A-Za-z0-9_:])(?<!\d\.)'
. '(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
. '(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}'
. '(?::\d{1,5})?'
. '(?![A-Za-z0-9_:])(?!\.\d)'
. '/u';
/** Coarse IPv6 candidate matcher (bracketed-with-port, or bare 2-7-colon hex form covering full / abbreviated / IPv4-mapped). Each match is validated with filter_var() in the redact() callback so PHP/Java scope ops like Foo::Bar and PZ timestamps like 12:00:00.000 are rejected. Boundary lookarounds mirror the IPv4 regex so trailing sentence periods don't block the match. */
public const string IPV6_REGEX = '/'
. '(?<![A-Za-z0-9_:])(?<!\d\.)'
. '(?:'
. '\[(?<bracketed>[0-9a-fA-F:.]+)\](?::\d{1,5})?'
. '|'
. '(?<bare>(?:[0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F.]*)'
. ')'
. '(?![A-Za-z0-9_:])(?!\.\d)'
. '/u';
/** Regex matching a 17-digit SteamID64 anchored on the 76561198 universe prefix, with lookaround boundaries that reject embedded occurrences. */
public const string STEAM_ID_REGEX = '/(?<![A-Za-z0-9])76561198\d{9}(?![A-Za-z0-9])/u';
@@ -54,10 +86,23 @@ class ProjectZomboidRedactor implements RedactorInterface
/** Matches integer coordinate triplets enclosed in round parentheses, anchored on a trailing PvP verb to disambiguate from server-metadata triples (pvp.txt Combat:/Safety: shape); only the attacker/first-coord set is redacted per line — the victim coords lack the trailing keyword and are deferred to v2. */
public const string COORDS_PARENTHESISED_REGEX = '/(?<=\()(?<x>\d+),(?<y>\d+),(?<z>-?\d+)(?=\) (?:hit|restore|store|true|false))/u';
private bool $redactIpAddresses = true;
private bool $redactSteamIds = true;
private bool $redactPlayerNames = true;
private bool $redactCoordinates = true;
/**
* Enable or disable the IP address redaction pass (covers IPv4 and IPv6).
*
* @param bool $on Pass true to enable, false to disable.
* @return static
*/
public function redactIpAddresses(bool $on): static
{
$this->redactIpAddresses = $on;
return $this;
}
/**
* Enable or disable the Steam ID redaction pass.
*
@@ -97,14 +142,31 @@ class ProjectZomboidRedactor implements RedactorInterface
/**
* Redact PII from the given Project Zomboid log content.
*
* Passes are applied in the mandatory order: Steam ID -> player name ->
* coordinates. See class docblock for rationale.
* Passes are applied in the order: IP address -> Steam ID -> player
* name -> coordinates. The Steam ID -> name -> coordinates ordering
* is mandatory (see class docblock); the IP pass is pattern-disjoint
* and runs first by convention.
*
* @param string $content Raw log content that may contain PII.
* @return string Content with enabled PII categories replaced by tokens.
*/
public function redact(string $content): string
{
if ($this->redactIpAddresses) {
$content = preg_replace_callback(
self::IPV6_REGEX,
static function (array $matches): string {
$candidate = ($matches['bracketed'] ?? '') !== ''
? $matches['bracketed']
: ($matches['bare'] ?? '');
return filter_var($candidate, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6) !== false
? self::IP_REPLACEMENT
: $matches[0];
},
$content
);
$content = preg_replace(self::IPV4_REGEX, self::IP_REPLACEMENT, $content);
}
if ($this->redactSteamIds) {
$content = preg_replace(self::STEAM_ID_REGEX, self::STEAM_ID_REPLACEMENT, $content);
}

View File

@@ -0,0 +1,114 @@
<?php
namespace IndifferentKetchup\Codex\Test\Tests\Util\Redactor;
use IndifferentKetchup\Codex\Util\ProjectZomboid\ProjectZomboidRedactor;
use PHPUnit\Framework\TestCase;
class ProjectZomboidRedactorIpv4Test extends TestCase
{
public function testRedactsBareIpv4(): void
{
$input = 'Connection from 192.168.1.1 closed.';
$expected = 'Connection from [REDACTED_IP] closed.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsIpv4WithPortSuffix(): void
{
$input = 'Connected to 10.0.0.42:27015.';
$expected = 'Connected to [REDACTED_IP].';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsMultipleIpv4OnOneLine(): void
{
$input = 'Peer 192.168.1.10 -> 192.168.1.20 via 10.0.0.1:8080.';
$expected = 'Peer [REDACTED_IP] -> [REDACTED_IP] via [REDACTED_IP].';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsLoopbackAndBoundaryAddresses(): void
{
$input = implode("\n", [
'127.0.0.1',
'0.0.0.0',
'255.255.255.255',
]);
$expected = implode("\n", [
'[REDACTED_IP]',
'[REDACTED_IP]',
'[REDACTED_IP]',
]);
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testDoesNotRedactOutOfRangeOctets(): void
{
// 999 is not a valid octet under the 0-255 alternation; the address
// must therefore be left untouched.
$input = 'Bogus: 999.999.999.999';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($input, $output);
}
public function testDoesNotRedactInsideLongerDottedSequence(): void
{
// Five dotted segments are not an IPv4 address; the lookarounds must
// reject any partial match inside the longer sequence.
$input = 'Path frag 1.2.3.4.5 should not match.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($input, $output);
}
public function testDoesNotRedactThreeSegmentBuildNumbers(): void
{
// PZ build numbers are 3-segment (e.g. 41.78.16) and must not match.
$input = 'Build 41.78.16 starting up.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($input, $output);
}
public function testToggleOffLeavesIpv4Intact(): void
{
$input = 'Connection from 192.168.1.1:27015 closed.';
$output = (new ProjectZomboidRedactor())
->redactIpAddresses(false)
->redact($input);
$this->assertSame($input, $output);
}
public function testIdempotence(): void
{
$input = implode("\n", [
'Connection from 192.168.1.1:27015 closed.',
'Peer 10.0.0.42 -> 10.0.0.43 via 172.16.0.1:8080.',
]);
$redactor = new ProjectZomboidRedactor();
$once = $redactor->redact($input);
$twice = $redactor->redact($once);
$this->assertSame($once, $twice);
}
}

View File

@@ -0,0 +1,135 @@
<?php
namespace IndifferentKetchup\Codex\Test\Tests\Util\Redactor;
use IndifferentKetchup\Codex\Util\ProjectZomboid\ProjectZomboidRedactor;
use PHPUnit\Framework\TestCase;
class ProjectZomboidRedactorIpv6Test extends TestCase
{
public function testRedactsFullIpv6(): void
{
$input = 'Bound 2001:0db8:85a3:0000:0000:8a2e:0370:7334 ok.';
$expected = 'Bound [REDACTED_IP] ok.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsAbbreviatedIpv6(): void
{
$input = 'Server peer 2001:db8::1 connected.';
$expected = 'Server peer [REDACTED_IP] connected.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsLoopbackIpv6(): void
{
$input = 'localhost ::1 reachable.';
$expected = 'localhost [REDACTED_IP] reachable.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsBracketedIpv6WithPort(): void
{
$input = 'Bound to [2001:db8::1]:8080 ok.';
$expected = 'Bound to [REDACTED_IP] ok.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsBracketedLoopbackWithPort(): void
{
$input = 'Listening on [::1]:27015.';
$expected = 'Listening on [REDACTED_IP].';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testRedactsIpv4MappedIpv6(): void
{
// IPv4-mapped form must be handled by the IPv6 pass before the IPv4
// pass so the leading "::ffff:" doesn't get orphaned. With the IPv6
// pass first, the whole token collapses into a single placeholder.
$input = 'Mapped ::ffff:192.168.1.1 ok.';
$expected = 'Mapped [REDACTED_IP] ok.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testDoesNotRedactJavaScopeOperator(): void
{
// Java method references and PHP scope operators look superficially
// like leading-:: IPv6 forms but fail filter_var validation; the
// word-boundary lookbehind also rejects matches that follow letters.
$input = 'Foo::bar called Object::toString.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($input, $output);
}
public function testDoesNotRedactTimestampShape(): void
{
// PZ log timestamps include hh:mm:ss.v segments which match the coarse
// IPv6 candidate pattern but are rejected by filter_var.
$input = '[16-04-26 12:00:00.000][LOG] startup complete';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($input, $output);
}
public function testDoesNotRedactSteamIdAsIpv6(): void
{
// 17-digit Steam IDs share no characters with IPv6 syntax, but assert
// explicitly so a future change to the IPv6 regex doesn't accidentally
// collide with the Steam ID pass.
$input = 'Player 76561198111111111 joined.';
$expected = 'Player 76561198000000000 joined.';
$output = (new ProjectZomboidRedactor())->redact($input);
$this->assertSame($expected, $output);
}
public function testToggleOffLeavesIpv6Intact(): void
{
$input = 'Bound to [2001:db8::1]:8080 ok.';
$output = (new ProjectZomboidRedactor())
->redactIpAddresses(false)
->redact($input);
$this->assertSame($input, $output);
}
public function testIdempotence(): void
{
$input = implode("\n", [
'Server peer 2001:db8::1 connected.',
'Listening on [::1]:27015.',
'Mapped ::ffff:192.168.1.1 ok.',
'[16-04-26 12:00:00.000][LOG] startup complete',
]);
$redactor = new ProjectZomboidRedactor();
$once = $redactor->redact($input);
$twice = $redactor->redact($once);
$this->assertSame($once, $twice);
}
}