feat: redact IPv4 and IPv6 addresses from PZ log content
Adds a fourth pass to ProjectZomboidRedactor that scrubs IPv4 (strict 0-255 octets, optional :port suffix) and IPv6 (full, abbreviated, bracketed-with-port, IPv4-mapped) addresses, replacing them with the literal [REDACTED_IP]. The new pass runs first because it is pattern-disjoint from the Steam-ID -> name -> coords chain. A single redactIpAddresses(bool) toggle controls both families; the existing toggles are unchanged. Strict regexes plus filter_var() validation prevent false positives on PZ timestamps (12:00:00.000) and PHP/Java scope ops (Foo::bar). 20 new tests cover bare/with-port/multiple/loopback/boundary IPv4, full / abbreviated / bracketed / IPv4-mapped IPv6, scope-op rejection, timestamp rejection, Steam-ID non-collision, toggle-off, and idempotence. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,15 +7,24 @@ use IndifferentKetchup\Codex\Util\RedactorInterface;
|
||||
/**
|
||||
* Render-time PII filter for Project Zomboid log content.
|
||||
*
|
||||
* Applies up to three sequential regex passes over the raw log string,
|
||||
* Applies up to four sequential regex passes over the raw log string,
|
||||
* each controlled by a boolean toggle (all enabled by default):
|
||||
*
|
||||
* 1. Steam ID pass — replaces 17-digit Steam IDs with a placeholder token.
|
||||
* 2. Player name pass — replaces player display names with a placeholder
|
||||
* 1. IP address pass — replaces IPv4 addresses (with optional :port
|
||||
* suffix) and IPv6 addresses (full, abbreviated, bracketed, and
|
||||
* IPv4-mapped forms; all with optional :port when bracketed) with
|
||||
* a placeholder token. Pattern-disjoint from the other passes.
|
||||
* 2. Steam ID pass — replaces 17-digit Steam IDs with a placeholder
|
||||
* token.
|
||||
* 3. Player name pass — replaces player display names with a placeholder
|
||||
* token. This pass anchors on the already-redacted Steam ID token, so
|
||||
* the ordering Steam ID -> name -> coordinates is mandatory.
|
||||
* 3. Coordinates pass — replaces world coordinate triplets with a placeholder
|
||||
* token.
|
||||
* 4. Coordinates pass — replaces world coordinate triplets with a
|
||||
* placeholder token.
|
||||
*
|
||||
* Pass 1 runs first by convention, not dependency: it shares no anchors
|
||||
* with passes 2-4 and could run anywhere in the chain without affecting
|
||||
* their output.
|
||||
*
|
||||
* All regex passes use the /u flag for Unicode safety.
|
||||
*
|
||||
@@ -24,6 +33,29 @@ use IndifferentKetchup\Codex\Util\RedactorInterface;
|
||||
*/
|
||||
class ProjectZomboidRedactor implements RedactorInterface
|
||||
{
|
||||
/** Generic placeholder substituted for every matched IPv4 or IPv6 address (with port suffix consumed when present). */
|
||||
public const string IP_REPLACEMENT = '[REDACTED_IP]';
|
||||
|
||||
/** Strict IPv4 with valid 0-255 octets and optional :port suffix. Lookarounds reject matches embedded in longer alphanumeric or dotted-decimal tokens; the (?<!\d\.) / (?!\.\d) pair specifically prevents matching inside an N-octet (N>4) sequence like 1.2.3.4.5 while still allowing a trailing sentence period after the IP/port. */
|
||||
public const string IPV4_REGEX = '/'
|
||||
. '(?<![A-Za-z0-9_:])(?<!\d\.)'
|
||||
. '(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
|
||||
. '(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}'
|
||||
. '(?::\d{1,5})?'
|
||||
. '(?![A-Za-z0-9_:])(?!\.\d)'
|
||||
. '/u';
|
||||
|
||||
/** Coarse IPv6 candidate matcher (bracketed-with-port, or bare 2-7-colon hex form covering full / abbreviated / IPv4-mapped). Each match is validated with filter_var() in the redact() callback so PHP/Java scope ops like Foo::Bar and PZ timestamps like 12:00:00.000 are rejected. Boundary lookarounds mirror the IPv4 regex so trailing sentence periods don't block the match. */
|
||||
public const string IPV6_REGEX = '/'
|
||||
. '(?<![A-Za-z0-9_:])(?<!\d\.)'
|
||||
. '(?:'
|
||||
. '\[(?<bracketed>[0-9a-fA-F:.]+)\](?::\d{1,5})?'
|
||||
. '|'
|
||||
. '(?<bare>(?:[0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F.]*)'
|
||||
. ')'
|
||||
. '(?![A-Za-z0-9_:])(?!\.\d)'
|
||||
. '/u';
|
||||
|
||||
/** Regex matching a 17-digit SteamID64 anchored on the 76561198 universe prefix, with lookaround boundaries that reject embedded occurrences. */
|
||||
public const string STEAM_ID_REGEX = '/(?<![A-Za-z0-9])76561198\d{9}(?![A-Za-z0-9])/u';
|
||||
|
||||
@@ -54,10 +86,23 @@ class ProjectZomboidRedactor implements RedactorInterface
|
||||
/** Matches integer coordinate triplets enclosed in round parentheses, anchored on a trailing PvP verb to disambiguate from server-metadata triples (pvp.txt Combat:/Safety: shape); only the attacker/first-coord set is redacted per line — the victim coords lack the trailing keyword and are deferred to v2. */
|
||||
public const string COORDS_PARENTHESISED_REGEX = '/(?<=\()(?<x>\d+),(?<y>\d+),(?<z>-?\d+)(?=\) (?:hit|restore|store|true|false))/u';
|
||||
|
||||
private bool $redactIpAddresses = true;
|
||||
private bool $redactSteamIds = true;
|
||||
private bool $redactPlayerNames = true;
|
||||
private bool $redactCoordinates = true;
|
||||
|
||||
/**
|
||||
* Enable or disable the IP address redaction pass (covers IPv4 and IPv6).
|
||||
*
|
||||
* @param bool $on Pass true to enable, false to disable.
|
||||
* @return static
|
||||
*/
|
||||
public function redactIpAddresses(bool $on): static
|
||||
{
|
||||
$this->redactIpAddresses = $on;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable or disable the Steam ID redaction pass.
|
||||
*
|
||||
@@ -97,14 +142,31 @@ class ProjectZomboidRedactor implements RedactorInterface
|
||||
/**
|
||||
* Redact PII from the given Project Zomboid log content.
|
||||
*
|
||||
* Passes are applied in the mandatory order: Steam ID -> player name ->
|
||||
* coordinates. See class docblock for rationale.
|
||||
* Passes are applied in the order: IP address -> Steam ID -> player
|
||||
* name -> coordinates. The Steam ID -> name -> coordinates ordering
|
||||
* is mandatory (see class docblock); the IP pass is pattern-disjoint
|
||||
* and runs first by convention.
|
||||
*
|
||||
* @param string $content Raw log content that may contain PII.
|
||||
* @return string Content with enabled PII categories replaced by tokens.
|
||||
*/
|
||||
public function redact(string $content): string
|
||||
{
|
||||
if ($this->redactIpAddresses) {
|
||||
$content = preg_replace_callback(
|
||||
self::IPV6_REGEX,
|
||||
static function (array $matches): string {
|
||||
$candidate = ($matches['bracketed'] ?? '') !== ''
|
||||
? $matches['bracketed']
|
||||
: ($matches['bare'] ?? '');
|
||||
return filter_var($candidate, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6) !== false
|
||||
? self::IP_REPLACEMENT
|
||||
: $matches[0];
|
||||
},
|
||||
$content
|
||||
);
|
||||
$content = preg_replace(self::IPV4_REGEX, self::IP_REPLACEMENT, $content);
|
||||
}
|
||||
if ($this->redactSteamIds) {
|
||||
$content = preg_replace(self::STEAM_ID_REGEX, self::STEAM_ID_REPLACEMENT, $content);
|
||||
}
|
||||
|
||||
114
test/tests/Util/Redactor/ProjectZomboidRedactorIpv4Test.php
Normal file
114
test/tests/Util/Redactor/ProjectZomboidRedactorIpv4Test.php
Normal file
@@ -0,0 +1,114 @@
|
||||
<?php
|
||||
|
||||
namespace IndifferentKetchup\Codex\Test\Tests\Util\Redactor;
|
||||
|
||||
use IndifferentKetchup\Codex\Util\ProjectZomboid\ProjectZomboidRedactor;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
class ProjectZomboidRedactorIpv4Test extends TestCase
|
||||
{
|
||||
public function testRedactsBareIpv4(): void
|
||||
{
|
||||
$input = 'Connection from 192.168.1.1 closed.';
|
||||
$expected = 'Connection from [REDACTED_IP] closed.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsIpv4WithPortSuffix(): void
|
||||
{
|
||||
$input = 'Connected to 10.0.0.42:27015.';
|
||||
$expected = 'Connected to [REDACTED_IP].';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsMultipleIpv4OnOneLine(): void
|
||||
{
|
||||
$input = 'Peer 192.168.1.10 -> 192.168.1.20 via 10.0.0.1:8080.';
|
||||
$expected = 'Peer [REDACTED_IP] -> [REDACTED_IP] via [REDACTED_IP].';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsLoopbackAndBoundaryAddresses(): void
|
||||
{
|
||||
$input = implode("\n", [
|
||||
'127.0.0.1',
|
||||
'0.0.0.0',
|
||||
'255.255.255.255',
|
||||
]);
|
||||
$expected = implode("\n", [
|
||||
'[REDACTED_IP]',
|
||||
'[REDACTED_IP]',
|
||||
'[REDACTED_IP]',
|
||||
]);
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testDoesNotRedactOutOfRangeOctets(): void
|
||||
{
|
||||
// 999 is not a valid octet under the 0-255 alternation; the address
|
||||
// must therefore be left untouched.
|
||||
$input = 'Bogus: 999.999.999.999';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($input, $output);
|
||||
}
|
||||
|
||||
public function testDoesNotRedactInsideLongerDottedSequence(): void
|
||||
{
|
||||
// Five dotted segments are not an IPv4 address; the lookarounds must
|
||||
// reject any partial match inside the longer sequence.
|
||||
$input = 'Path frag 1.2.3.4.5 should not match.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($input, $output);
|
||||
}
|
||||
|
||||
public function testDoesNotRedactThreeSegmentBuildNumbers(): void
|
||||
{
|
||||
// PZ build numbers are 3-segment (e.g. 41.78.16) and must not match.
|
||||
$input = 'Build 41.78.16 starting up.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($input, $output);
|
||||
}
|
||||
|
||||
public function testToggleOffLeavesIpv4Intact(): void
|
||||
{
|
||||
$input = 'Connection from 192.168.1.1:27015 closed.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())
|
||||
->redactIpAddresses(false)
|
||||
->redact($input);
|
||||
|
||||
$this->assertSame($input, $output);
|
||||
}
|
||||
|
||||
public function testIdempotence(): void
|
||||
{
|
||||
$input = implode("\n", [
|
||||
'Connection from 192.168.1.1:27015 closed.',
|
||||
'Peer 10.0.0.42 -> 10.0.0.43 via 172.16.0.1:8080.',
|
||||
]);
|
||||
|
||||
$redactor = new ProjectZomboidRedactor();
|
||||
$once = $redactor->redact($input);
|
||||
$twice = $redactor->redact($once);
|
||||
|
||||
$this->assertSame($once, $twice);
|
||||
}
|
||||
}
|
||||
135
test/tests/Util/Redactor/ProjectZomboidRedactorIpv6Test.php
Normal file
135
test/tests/Util/Redactor/ProjectZomboidRedactorIpv6Test.php
Normal file
@@ -0,0 +1,135 @@
|
||||
<?php
|
||||
|
||||
namespace IndifferentKetchup\Codex\Test\Tests\Util\Redactor;
|
||||
|
||||
use IndifferentKetchup\Codex\Util\ProjectZomboid\ProjectZomboidRedactor;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
class ProjectZomboidRedactorIpv6Test extends TestCase
|
||||
{
|
||||
public function testRedactsFullIpv6(): void
|
||||
{
|
||||
$input = 'Bound 2001:0db8:85a3:0000:0000:8a2e:0370:7334 ok.';
|
||||
$expected = 'Bound [REDACTED_IP] ok.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsAbbreviatedIpv6(): void
|
||||
{
|
||||
$input = 'Server peer 2001:db8::1 connected.';
|
||||
$expected = 'Server peer [REDACTED_IP] connected.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsLoopbackIpv6(): void
|
||||
{
|
||||
$input = 'localhost ::1 reachable.';
|
||||
$expected = 'localhost [REDACTED_IP] reachable.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsBracketedIpv6WithPort(): void
|
||||
{
|
||||
$input = 'Bound to [2001:db8::1]:8080 ok.';
|
||||
$expected = 'Bound to [REDACTED_IP] ok.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsBracketedLoopbackWithPort(): void
|
||||
{
|
||||
$input = 'Listening on [::1]:27015.';
|
||||
$expected = 'Listening on [REDACTED_IP].';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testRedactsIpv4MappedIpv6(): void
|
||||
{
|
||||
// IPv4-mapped form must be handled by the IPv6 pass before the IPv4
|
||||
// pass so the leading "::ffff:" doesn't get orphaned. With the IPv6
|
||||
// pass first, the whole token collapses into a single placeholder.
|
||||
$input = 'Mapped ::ffff:192.168.1.1 ok.';
|
||||
$expected = 'Mapped [REDACTED_IP] ok.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testDoesNotRedactJavaScopeOperator(): void
|
||||
{
|
||||
// Java method references and PHP scope operators look superficially
|
||||
// like leading-:: IPv6 forms but fail filter_var validation; the
|
||||
// word-boundary lookbehind also rejects matches that follow letters.
|
||||
$input = 'Foo::bar called Object::toString.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($input, $output);
|
||||
}
|
||||
|
||||
public function testDoesNotRedactTimestampShape(): void
|
||||
{
|
||||
// PZ log timestamps include hh:mm:ss.v segments which match the coarse
|
||||
// IPv6 candidate pattern but are rejected by filter_var.
|
||||
$input = '[16-04-26 12:00:00.000][LOG] startup complete';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($input, $output);
|
||||
}
|
||||
|
||||
public function testDoesNotRedactSteamIdAsIpv6(): void
|
||||
{
|
||||
// 17-digit Steam IDs share no characters with IPv6 syntax, but assert
|
||||
// explicitly so a future change to the IPv6 regex doesn't accidentally
|
||||
// collide with the Steam ID pass.
|
||||
$input = 'Player 76561198111111111 joined.';
|
||||
$expected = 'Player 76561198000000000 joined.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())->redact($input);
|
||||
|
||||
$this->assertSame($expected, $output);
|
||||
}
|
||||
|
||||
public function testToggleOffLeavesIpv6Intact(): void
|
||||
{
|
||||
$input = 'Bound to [2001:db8::1]:8080 ok.';
|
||||
|
||||
$output = (new ProjectZomboidRedactor())
|
||||
->redactIpAddresses(false)
|
||||
->redact($input);
|
||||
|
||||
$this->assertSame($input, $output);
|
||||
}
|
||||
|
||||
public function testIdempotence(): void
|
||||
{
|
||||
$input = implode("\n", [
|
||||
'Server peer 2001:db8::1 connected.',
|
||||
'Listening on [::1]:27015.',
|
||||
'Mapped ::ffff:192.168.1.1 ok.',
|
||||
'[16-04-26 12:00:00.000][LOG] startup complete',
|
||||
]);
|
||||
|
||||
$redactor = new ProjectZomboidRedactor();
|
||||
$once = $redactor->redact($input);
|
||||
$twice = $redactor->redact($once);
|
||||
|
||||
$this->assertSame($once, $twice);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user