Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6704b505 authored by AVINASH GUSAIN's avatar AVINASH GUSAIN
Browse files

fix:script for cleanup stale entries

parent d21165c7
Loading
Loading
Loading
Loading
+71 −0
Original line number Diff line number Diff line
@@ -152,3 +152,74 @@ occ email-recovery:recovery-warning-notification 2>> /var/log/nextcloud/recovery
# Log with timestamps
occ email-recovery:recovery-warning-notification 2>&1 | while IFS= read -r line; do echo "$(date '+%Y-%m-%d %H:%M:%S') $line"; done >> /var/log/nextcloud/recovery-timestamped.log
```


### Spam Account Filter Command

This document explains how to use the new `FilterLegitimateDomainsFromSpamReport` command to filter out legitimate domains from spam account reports.

## Overview

The command processes a spam account report file and removes entries that belong to popular (legitimate) email domains, creating a new file with only true spam entries.

## Usage

```bash
occ email-recovery:filter-legitimate-domains <input-file> <output-file> [options]
```

### Arguments

- `input-file`: Path to the spam account report file (e.g., `spam-account-report-2025-07-23.txt`)
- `output-file`: Path where the filtered output file will be created

### Options

- `--include-recovery-email`: Include recovery email addresses in the output file (format: `userid,recovery-email`)

## Examples

### Basic Usage
```bash
occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt
```

### Include Recovery Email Addresses
```bash
occ email-recovery:filter-legitimate-domains spam-account-report-2025-07-23.txt filtered-spam-report.txt --include-recovery-email
```

## How It Works

1. **Reads the input file**: Parses each line as a user ID from the spam report
2. **Gets recovery email**: Retrieves the recovery email address for each user
3. **Validates recovery email**: Uses the same validation logic as `getAllSpamEmails()` in `RecoveryEmailService`
4. **Filters entries**: Keeps only entries that fail validation (true spam) and filters out legitimate domains
5. **Creates output file**: Writes the filtered results to the specified output file

The filtering uses the exact same logic as the spam detection system, ensuring consistency and accuracy.

## Output Format

### Without --include-recovery-email
```
user1
user2
user3
```

### With --include-recovery-email
```
user1,spam@example.com
user2,fake@disposable.com
user3,temp@throwaway.net
```

## Summary Information

The command provides a summary showing:
- Total entries processed
- Legitimate domains (filtered out)
- Spam entries (kept)
- Invalid entries (skipped)
+1 −0
Original line number Diff line number Diff line
@@ -24,5 +24,6 @@
		<command>OCA\EmailRecovery\Command\ResetDisposableDomainsList</command>
		<command>OCA\EmailRecovery\Command\AdminBlacklistedDomains</command>
		<command>OCA\EmailRecovery\Command\RecoveryWarningNotificationCommand</command>
		<command>OCA\EmailRecovery\Command\FilterLegitimateDomainsFromSpamReport</command>
	</commands>
</info>
+102 −0
Original line number Diff line number Diff line
<?php

declare(strict_types=1);

namespace OCA\EmailRecovery\Command;

use OCA\EmailRecovery\AppInfo\Application;
use OCA\EmailRecovery\Service\SpamFilterService;
use Psr\Log\LoggerInterface;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;

class FilterLegitimateDomainsFromSpamReport extends Command {
	private SpamFilterService $spamFilterService;
	private LoggerInterface $logger;

	public function __construct(
		SpamFilterService $spamFilterService,
		LoggerInterface $logger
	) {
		parent::__construct();
		$this->spamFilterService = $spamFilterService;
		$this->logger = $logger;
	}

	protected function configure() {
		$this
			->setName(Application::APP_ID . ':filter-legitimate-domains')
			->setDescription('Filter out legitimate domains from spam account report and output to console')
			->addArgument('input-file', InputArgument::REQUIRED, 'Path to the spam account report file')
			->addOption('include-recovery-email', null, InputOption::VALUE_NONE, 'Include recovery email addresses in output')
			->addOption('quiet', 'q', InputOption::VALUE_NONE, 'Suppress progress messages and only output filtered data');
	}

	protected function execute(InputInterface $input, OutputInterface $output): int {
		try {
			$inputFile = $input->getArgument('input-file');
			$includeRecoveryEmail = $input->getOption('include-recovery-email');
			$quiet = $input->getOption('quiet');

			// Validate and read input file
			$spamUserIds = $this->spamFilterService->readSpamReportFile($inputFile, $output);
			if ($spamUserIds === null) {
				return Command::FAILURE;
			}

			// Process the spam users and filter legitimate domains
			$results = $this->spamFilterService->processSpamUsers($spamUserIds, $includeRecoveryEmail, $output);

			// Log summary
			$this->logSummary($results);

			// Write filtered results to console (data only)
			$this->outputFilteredResults($results['filteredEntries'], $output, $quiet);

			return Command::SUCCESS;
		} catch (\Throwable $e) {
			$this->logger->error('Error while filtering spam report: ' . $e->getMessage());
			$output->writeln('<error>Error: ' . $e->getMessage() . '</error>');
			return Command::FAILURE;
		}
	}

	/**
	 * Logs the filtering summary
	 */
	private function logSummary(array $results): void {
		$totalEntries = $results['totalEntries'];
		$legitimateCount = $results['legitimateCount'];
		$spamCount = $results['spamCount'];
		$invalidCount = $totalEntries - $legitimateCount - $spamCount;

		$this->logger->info('=== Filtering Summary ===');
		$this->logger->info('Total entries processed: ' . $totalEntries);
		$this->logger->info('Legitimate domains (filtered out): ' . $legitimateCount);
		$this->logger->info('Spam entries (kept): ' . $spamCount);
		$this->logger->info('Invalid entries (skipped): ' . $invalidCount);
	}

	/**
	 * Outputs the filtered results to console (stdout) and logs status
	 */
	private function outputFilteredResults(array $filteredEntries, OutputInterface $output, bool $quiet = false): void {
		// no entries to write
		if (empty($filteredEntries)) {
			$this->logger->info('No spam entries to output (all were legitimate domains)');
			return;
		}

		$this->logger->info('Outputting ' . count($filteredEntries) . ' filtered entries to console');
		
		// Write each entry to stdout (pure data only)
		foreach ($filteredEntries as $entry) {
			echo $entry . "\n";
		}
		
		$this->logger->info('Successfully output ' . count($filteredEntries) . ' entries to console');
	}
}
+103 −13
Original line number Diff line number Diff line
@@ -682,6 +682,47 @@ class RecoveryEmailService {
	
		return false;
	}
	/**
	 * Validates a user's recovery email and determines if it's spam.
	 *
	 * This is the core validation logic used by both getAllSpamEmails and
	 * filterLegitimateDomainsFromSpamUsers methods.
	 *
	 * @param string $userId The user ID to validate
	 * @param string $recoveryEmail The recovery email to validate
	 * @param string $userEmail The user's main email address
	 * @return array Returns an array with 'isSpam' boolean and 'reason' string
	 */
	private function validateUserRecoveryEmail(string $userId, string $recoveryEmail, string $userEmail): array {
		// Check if user has active subscription (skip if they do)
		try {
			if ($this->hasActiveSubscription($userEmail)) {
				$this->logger->info("User $userId has an active subscription. Skipping spam flag for <$recoveryEmail>.");
				return ['isSpam' => false, 'reason' => 'active_subscription'];
			}
		} catch (\Throwable $e) {
			$this->logger->error("Error checking subscription for $userId <$userEmail>: " . $e->getMessage());
			return ['isSpam' => false, 'reason' => 'subscription_check_error'];
		}

		// Validate the recovery email
		try {
			if ($this->validateRecoveryEmail($recoveryEmail, $userId)) {
				// Validation passed - this is legitimate (not spam)
				return ['isSpam' => false, 'reason' => 'validation_passed'];
			} else {
				// Validation failed - this is spam
				return ['isSpam' => true, 'reason' => 'validation_failed'];
			}
		} catch (BlacklistedEmailException | InvalidRecoveryEmailException $e) {
			$this->logger->info("Validation failed (spam) for $userId <$recoveryEmail>: " . $e->getMessage());
			return ['isSpam' => true, 'reason' => 'validation_failed'];
		} catch (\Throwable $e) {
			$this->logger->info("Error while checking $userId <$recoveryEmail>: " . $e->getMessage());
			return ['isSpam' => false, 'reason' => 'validation_error'];
		}
	}

	/**
	 * Scans all verified recovery email addresses and returns a list of spam accounts.
	 *
@@ -721,28 +762,77 @@ class RecoveryEmailService {
				continue;
			}

			try {
				if ($this->hasActiveSubscription($email)) {
					$this->logger->info("User $userId has an active subscription. Skipping spam flag for <$recoveryEmail>.");
			$validation = $this->validateUserRecoveryEmail($userId, $recoveryEmail, $email);
			if ($validation['isSpam']) {
				$onSpamDetected($userId, $recoveryEmail);
			}
		}
	}

	/**
	 * Filters legitimate domains from a list of user IDs that were flagged as spam.
	 *
	 * This method takes a list of user IDs from a spam report and filters out those
	 * that belong to legitimate (popular) domains, returning only true spam entries.
	 *
	 * @param array $spamUserIds Array of user IDs from spam report
	 * @param callable $onSpamDetected Callback function with signature fn(string $userId, string $recoveryEmail): void
	 * @param callable $onLegitimateDetected Callback function with signature fn(string $userId, string $recoveryEmail): void
	 * @param callable $onInvalidDetected Optional callback function with signature fn(string $userId, string $reason): void
	 * @return void
	 */
	public function filterLegitimateDomainsFromSpamUsers(array $spamUserIds, callable $onSpamDetected, callable $onLegitimateDetected = null, callable $onInvalidDetected = null): void {
		foreach ($spamUserIds as $userId) {
			$userId = strtolower(trim($userId));
			
			if ($userId === '') {
				if ($onInvalidDetected) {
					$onInvalidDetected($userId, 'empty_user_id');
				}
				continue;
			}
			} catch (\Throwable $e) {
				$this->logger->error("Error checking subscription for $userId <$email>: " . $e->getMessage());

			$user = $this->userManager->get($userId);
			if ($user === null) {
				$this->logger->info("User not found: $userId");
				if ($onInvalidDetected) {
					$onInvalidDetected($userId, 'user_not_found');
				}
				continue;
			}

			try {
				if (!$this->validateRecoveryEmail($recoveryEmail, $userId)) {
					$onSpamDetected($userId, $recoveryEmail);
			$email = $user->getEMailAddress();
			if (empty($email)) {
				$this->logger->info("No email address found for user: $userId");
				if ($onInvalidDetected) {
					$onInvalidDetected($userId, 'no_email_address');
				}
			} catch (BlacklistedEmailException | InvalidRecoveryEmailException $e) {
				$this->logger->info("Validation failed (spam) for $userId <$recoveryEmail>: " . $e->getMessage());
				continue;
			}

			// Get recovery email for this user
			$recoveryEmail = $this->getRecoveryEmail($userId);
			if (empty($recoveryEmail)) {
				$this->logger->info("No recovery email found for user: $userId");
				if ($onInvalidDetected) {
					$onInvalidDetected($userId, 'no_recovery_email');
				}
				continue;
			}

			$validation = $this->validateUserRecoveryEmail($userId, $recoveryEmail, $email);
			
			if ($validation['isSpam']) {
				$onSpamDetected($userId, $recoveryEmail);
			} catch (\Throwable $e) {
				$this->logger->info("Error while checking $userId <$recoveryEmail>: " . $e->getMessage());
			} else {
				// This is legitimate (validation passed or other non-spam reason)
				if ($onLegitimateDetected) {
					$onLegitimateDetected($userId, $recoveryEmail);
				}
			}
		}
	}

	/** Recovery email reminder start date **/
	public function getRecoveryEmailReminderStartDate(string $uid): ?string {
		return $this->config->getUserValue($uid, $this->appName, self::RECOVERY_EMAIL_REMINDER_START_DATE, null);
+110 −0
Original line number Diff line number Diff line
<?php

declare(strict_types=1);

namespace OCA\EmailRecovery\Service;

use Psr\Log\LoggerInterface;
use Symfony\Component\Console\Output\OutputInterface;

class SpamFilterService {
	private RecoveryEmailService $recoveryEmailService;
	private LoggerInterface $logger;
	private array $processingContext = [];

	public function __construct(
		RecoveryEmailService $recoveryEmailService,
		LoggerInterface $logger
	) {
		$this->recoveryEmailService = $recoveryEmailService;
		$this->logger = $logger;
	}

	/**
	 * Validates and reads the spam report input file
	 */
	public function readSpamReportFile(string $inputFile, OutputInterface $output): ?array {
		// Check if input file exists
		if (!file_exists($inputFile)) {
			$output->writeln('<error>Input file does not exist: ' . $inputFile . '</error>');
			return null;
		}

		$this->logger->info('Reading spam account report from: ' . $inputFile);

		// Read the input file
		$lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
		if ($lines === false) {
			$output->writeln('<error>Failed to read input file</error>');
			return null;
		}

		// Filter out header lines and empty lines
		$spamUserIds = array_filter($lines, function ($line) {
			$line = trim($line);
			return !empty($line) && $line !== 'Spam user list:';
		});

		return array_values($spamUserIds); // Re-index array
	}

	/**
	 * Processes spam users and filters out legitimate domains
	 */
	public function processSpamUsers(array $spamUserIds, bool $includeRecoveryEmail, OutputInterface $output): array {
		$totalEntries = count($spamUserIds);
		$legitimateCount = 0;
		$spamCount = 0;
		$filteredEntries = [];

		$this->logger->info('Processing ' . $totalEntries . ' entries...');

		// Set the context for the callback methods
		$this->processingContext = [
			'spamCount' => &$spamCount,
			'legitimateCount' => &$legitimateCount,
			'filteredEntries' => &$filteredEntries,
			'includeRecoveryEmail' => $includeRecoveryEmail,
			'output' => $output
		];

		// Use the service method to filter legitimate domains
		$this->recoveryEmailService->filterLegitimateDomainsFromSpamUsers(
			$spamUserIds,
			// Callback for spam entries (keep them)
			[$this, 'onSpamDetected'],
			// Callback for legitimate entries (filter them out)
			[$this, 'onLegitimateDetected']
		);

		return [
			'totalEntries' => $totalEntries,
			'legitimateCount' => $legitimateCount,
			'spamCount' => $spamCount,
			'filteredEntries' => $filteredEntries
		];
	}

	/**
	 * Callback method for spam entries (keep them)
	 */
	public function onSpamDetected(string $userId, string $recoveryEmail): void {
		$this->processingContext['spamCount']++;
		
		if ($this->processingContext['includeRecoveryEmail']) {
			$this->processingContext['filteredEntries'][] = "$userId,$recoveryEmail";
		} else {
			$this->processingContext['filteredEntries'][] = $userId;
		}
		
		$this->logger->info("KEEPING SPAM: $userId -> $recoveryEmail");
	}

	/**
	 * Callback method for legitimate entries (filter them out)
	 */
	public function onLegitimateDetected(string $userId, string $recoveryEmail): void {
		$this->processingContext['legitimateCount']++;
		$this->logger->info("REMOVING LEGITIMATE: $userId -> $recoveryEmail");
	}
}