I've gotten several questions on you might implement Benford's Law, which was a previous post I wrote. If you haven't read it and you aren't familiar, you'll need to look it over for this to make sense.
As a reminder, non-random numbers (which includes financial transactions) should occur at the following frequencies for their first digits:
We will keep this C# code fairly straightforward so anybody can try it out. Therefore, rather than a database or anything too fancy, we'll just import values from a CSV file stored at C:\Transactions.csv on your local drive. We will then import each row as a decimal into an array.
private static double[] GetTransactionsFromFile() { try { string[] allLines = File.ReadAllLines(@"C:\Transactions.csv"); double[] allTransactions = new double[allLines.Length]; for (int i = 0; i < allLines.Length; i++) { double lineValue; if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue)) { allTransactions[i] = lineValue; } else { Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program."); return null; } } return allTransactions; } catch(FileNotFoundException ex) { Console.Write(ex.Message); Console.WriteLine(" Did you copy the example file or create a new file at that location?"); } return null; }
This line of code, excerpted from the above section, will properly remove currency and commas, based on the computer's locale.
double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue)
This will pull the first digit of every value in the array:
private static double[] GetFirstDigitFrequency(double[] allTransactions) { double[] firstDigitFrequency = new double[10]; for (int i = 0; i < allTransactions.Length; i++) { int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1)); firstDigitFrequency[firstDigit]++; } return firstDigitFrequency; }
This will output any first digits that don't fall within a tolerance of 50% above or below the expected rate of occurrence. It omits first digit frequencies that lack any transactions and also throws a warning if you don't have at least 25 transactions.
private static void OutputResults(double[] firstDigitFrequency) { double allowedTolerance = 0.5; double total = firstDigitFrequency.Sum(); double frequencyPercentage; double benfordPercentage; if (total < 25) Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test."); for (int i = 1; i < 10; i++) { if (firstDigitFrequency[i] == 0) break; frequencyPercentage = firstDigitFrequency[i] / total; benfordPercentage = Math.Log10(1 + 1.0 / i); if ((frequencyPercentage < (benfordPercentage * (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage * (1 + allowedTolerance)))) Console.WriteLine("Review transactions with a first digit of: " + i); } Console.WriteLine("Complete."); }
The complete source code:
using System; using System.Globalization; using System.IO; using System.Linq; namespace ClarkOnCode_FraudDetection { class Program { static void Main(string[] args) { double[] allTransactions = GetTransactionsFromFile(); double[] firstDigitFrequency = GetFirstDigitFrequency(allTransactions); OutputResults(firstDigitFrequency); Console.ReadLine(); } private static void OutputResults(double[] firstDigitFrequency) { double allowedTolerance = 0.5; double total = firstDigitFrequency.Sum(); double frequencyPercentage; double benfordPercentage; if (total < 25) Console.WriteLine("You have a low number of transactions which reduces the effectiveness of the test."); for (int i = 1; i < 10; i++) { if (firstDigitFrequency[i] == 0) break; frequencyPercentage = firstDigitFrequency[i] / total; benfordPercentage = Math.Log10(1 + 1.0 / i); if ((frequencyPercentage < (benfordPercentage * (1 - allowedTolerance))) || (frequencyPercentage > (benfordPercentage * (1 + allowedTolerance)))) Console.WriteLine("Review transactions with a first digit of: " + i); } Console.WriteLine("Complete."); } private static double[] GetFirstDigitFrequency(double[] allTransactions) { double[] firstDigitFrequency = new double[10]; for (int i = 0; i < allTransactions.Length; i++) { int firstDigit = int.Parse(allTransactions[i].ToString().Substring(0, 1)); firstDigitFrequency[firstDigit]++; } return firstDigitFrequency; } private static double[] GetTransactionsFromFile() { try { string[] allLines = File.ReadAllLines(@"C:\Transactions.csv"); double[] allTransactions = new double[allLines.Length]; for (int i = 0; i < allLines.Length; i++) { double lineValue; if (double.TryParse(allLines[i], NumberStyles.Currency, new CultureInfo("en-US"), out lineValue)) { allTransactions[i] = lineValue; } else { Console.WriteLine("Line #" + (i + 1) + " is not a valid dollar amount. Please fix and restart the program."); return null; } } return allTransactions; } catch(FileNotFoundException ex) { Console.Write(ex.Message); Console.WriteLine(" Did you copy the example file or create a new file at that location?"); } return null; } } }