An example of usage of a Trie data structure
This is the solution to problem http://acm.tju.edu.cn/toj/showp1016.html. I found this problem to be a little complicated, and although the code below solves it I’m inclined to believe this isn’t the most optimal code. There are several techniques worth-mentioning in the solution to this problem: first, the use of a Trie data structure to represent the words in the dictionary. Notice that a Trie is nothing but a tree with each node having potentially multiple child nodes. It is usually a very sparse tree, though. Use the same technique of building a tree: start with a private Node class, and go from there. The use of hashtables come very handy in building the Trie. Next, write a (non-recursive) function to detect whether a certain word belongs to the dictionary, if so return the cumulative probability for that word (or -1 if the word isn’t there). Finally, you’ll need to write some (recursive) code to generate the potential permutations given a sequence of digits. Because you don’t know a priori which combination will yield the highest probability there seems to be a need to generate them all (with a quick optimization along the way: if at some point you found a “MANUAL” prefix, cut the search down. Mathematically, if there isn’t a path in the tree for the prefix P, then there won’t be a path in the tree for any prefix P+<anystring>).
Program.cs:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace T9
{
class Program
{
static void Main(string[] args)
{
if (args.Length == 0)
{
Usage();
return;
}
FileInfo fi = new FileInfo(args[0]);
StreamReader sr = fi.OpenText();
int numberOfScenarios = Convert.ToInt32(sr.ReadLine());
for (int iScenario = 0; iScenario < numberOfScenarios; iScenario++)
{
WordsDictionaryTrie wordsTrie = new WordsDictionaryTrie();
int numberOfWordsInDictionary = Convert.ToInt32(sr.ReadLine());
for (int iWord = 0; iWord < numberOfWordsInDictionary; iWord++)
{
string[] lineParts = sr.ReadLine().Split(' ');
string word = lineParts[0];
int probability = Convert.ToInt32(lineParts[1]);
wordsTrie.AddWord(word, probability);
}
Console.WriteLine("Scenario #{0}:", iScenario+1);
int numberOfSequencesOfDigits = Convert.ToInt32(sr.ReadLine());
for (int iSequence = 0; iSequence < numberOfSequencesOfDigits; iSequence++)
{
string sequenceOfDigits = sr.ReadLine();
wordsTrie.PrintT9Words(sequenceOfDigits);
Console.WriteLine();
}
}
sr.Close();
}
static void Usage()
{
Console.WriteLine("Usage: T9.exe <input file>");
Console.WriteLine("For more info visit http://acm.tju.edu.cn/toj/showp1016.html");
}
}
}
WordsDictionaryTrie.cs:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;
namespace T9
{
class WordsDictionaryTrie
{
class Node
{
public char letter;
public int probabilityWordEnding;
public Hashtable htNextNodes;
private Node() { }
public Node(char letter,
int probabilityWordEnding)
{
this.letter = letter;
this.probabilityWordEnding = probabilityWordEnding;
this.htNextNodes = null;
}
}
private Node root;
private Hashtable wordsAlreadyPrinted;
public WordsDictionaryTrie()
{
this.root = new Node('*',
0);
this.wordsAlreadyPrinted = new Hashtable();
}
public void AddWord(string word,
int probability)
{
_AddWord(this.root,
word,
0,
probability);
}
private void _AddWord(Node currentNode,
string word,
int currentPosition,
int probability)
{
if (currentNode == null ||
currentPosition >= word.Length ||
word == null ||
word.Length == 0)
{
return;
}
if (currentNode.htNextNodes == null)
{
currentNode.htNextNodes = new Hashtable();
}
if (!currentNode.htNextNodes.ContainsKey(word[currentPosition]))
{
Node newNode = new Node(word[currentPosition],
0);
currentNode.htNextNodes.Add(word[currentPosition],
newNode);
}
currentNode = (Node)currentNode.htNextNodes[word[currentPosition]];
currentNode.probabilityWordEnding += probability;
_AddWord(currentNode,
word,
currentPosition + 1,
probability);
}
public void PrintT9Words(string sequenceOfDigits)
{
if (sequenceOfDigits == null)
{
return;
}
Hashtable htNumberKeys = new Hashtable();
htNumberKeys.Add('2', "abc");
htNumberKeys.Add('3', "def");
htNumberKeys.Add('4', "ghi");
htNumberKeys.Add('5', "jkl");
htNumberKeys.Add('6', "mno");
htNumberKeys.Add('7', "pqrs");
htNumberKeys.Add('8', "tuv");
htNumberKeys.Add('9', "wxyz");
bool prefixDoesNotExist = false;
for (int i = 0; i < sequenceOfDigits.Length - 1; i++)
{
string prefixedSequence = sequenceOfDigits.Substring(0, i + 1);
string bestWord = "";
int bestProbability = Int32.MinValue;
if (!prefixDoesNotExist)
{
_GetWordWithBestProbability(ref htNumberKeys,
prefixedSequence,
0,
"",
ref bestWord,
ref bestProbability);
}
if (bestWord == "")
{
Console.WriteLine("MANUALLY");
prefixDoesNotExist = true;
}
else
{
Console.WriteLine("{0} (debugging: overall prefix probability = {1})", bestWord, bestProbability);
}
}
}
private string _CompareStringsAlphabeticalOrder(string s1,
string s2)
{
if (s1 == null ||
s2 == null)
{
return null;
}
int minLen = Math.Min(s1.Length,
s2.Length);
for (int i = 0; i < minLen; i++)
{
if (s1[i] < s2[i])
{
return s1;
}
if (s2[i] < s1[i])
{
return s2;
}
}
if (s1.Length < s2.Length)
{
return s1;
}
else
{
return s2;
}
}
private void _GetWordWithBestProbability(ref Hashtable htNumberKeys,
string sequenceOfDigits,
int currentPosition,
string currentWord,
ref string bestWord,
ref int bestProbability)
{
if (currentPosition >= sequenceOfDigits.Length)
{
int currentProbability = _GetWordProbability(currentWord);
if (currentProbability != -1 &&
currentProbability > bestProbability)
{
bestProbability = currentProbability;
bestWord = currentWord;
}
else if (currentProbability == bestProbability)
{
bestWord = _CompareStringsAlphabeticalOrder(bestWord, currentWord);
}
return;
}
if (htNumberKeys.ContainsKey(sequenceOfDigits[currentPosition]))
{
string mappedLetters = (string)htNumberKeys[sequenceOfDigits[currentPosition]];
foreach (char mappedLetter in mappedLetters)
{
_GetWordWithBestProbability(ref htNumberKeys,
sequenceOfDigits,
currentPosition + 1,
currentWord + mappedLetter.ToString(),
ref bestWord,
ref bestProbability);
}
}
}
private int _GetWordProbability(string word)
{
int probability = -1;
if (word == null)
{
return probability;
}
Node currentNode = this.root;
for (int i = 0; i < word.Length; i++)
{
if (currentNode.htNextNodes != null &&
currentNode.htNextNodes.ContainsKey(word[i]))
{
Node nextNode = (Node)currentNode.htNextNodes[word[i]];
if (probability == -1)
{
probability = nextNode.probabilityWordEnding;
}
else
{
probability += nextNode.probabilityWordEnding;
}
currentNode = nextNode;
}
else
{
probability = -1;
break;
}
}
return probability;
}
}
}
Here is the output from the code above for the sample input:
Scenario #1:
i (debugging: overall prefix probability = 8)
id (debugging: overall prefix probability = 16)
hel (debugging: overall prefix probability = 21)
hell (debugging: overall prefix probability = 28)
hello (debugging: overall prefix probability = 32)
i (debugging: overall prefix probability = 8)
id (debugging: overall prefix probability = 16)
ide (debugging: overall prefix probability = 24)
idea (debugging: overall prefix probability = 32)
Scenario #2:
p (debugging: overall prefix probability = 4)
pr (debugging: overall prefix probability = 8)
pro (debugging: overall prefix probability = 12)
prog (debugging: overall prefix probability = 16)
progr (debugging: overall prefix probability = 20)
progra (debugging: overall prefix probability = 24)
program (debugging: overall prefix probability = 28)
n (debugging: overall prefix probability = 14)
ne (debugging: overall prefix probability = 28)
new (debugging: overall prefix probability = 42)
g (debugging: overall prefix probability = 13)
in (debugging: overall prefix probability = 12)
int (debugging: overall prefix probability = 18)
c (debugging: overall prefix probability = 6)
co (debugging: overall prefix probability = 12)
con (debugging: overall prefix probability = 18)
cont (debugging: overall prefix probability = 24)
anoth (debugging: overall prefix probability = 25)
anothe (debugging: overall prefix probability = 30)
another (debugging: overall prefix probability = 35)
p (debugging: overall prefix probability = 4)
pr (debugging: overall prefix probability = 8)
MANUALLY
MANUALLY
Program.cs:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace T9
{
class Program
{
static void Main(string[] args)
{
if (args.Length == 0)
{
Usage();
return;
}
FileInfo fi = new FileInfo(args[0]);
StreamReader sr = fi.OpenText();
int numberOfScenarios = Convert.ToInt32(sr.ReadLine());
for (int iScenario = 0; iScenario < numberOfScenarios; iScenario++)
{
WordsDictionaryTrie wordsTrie = new WordsDictionaryTrie();
int numberOfWordsInDictionary = Convert.ToInt32(sr.ReadLine());
for (int iWord = 0; iWord < numberOfWordsInDictionary; iWord++)
{
string[] lineParts = sr.ReadLine().Split(' ');
string word = lineParts[0];
int probability = Convert.ToInt32(lineParts[1]);
wordsTrie.AddWord(word, probability);
}
Console.WriteLine("Scenario #{0}:", iScenario+1);
int numberOfSequencesOfDigits = Convert.ToInt32(sr.ReadLine());
for (int iSequence = 0; iSequence < numberOfSequencesOfDigits; iSequence++)
{
string sequenceOfDigits = sr.ReadLine();
wordsTrie.PrintT9Words(sequenceOfDigits);
Console.WriteLine();
}
}
sr.Close();
}
static void Usage()
{
Console.WriteLine("Usage: T9.exe <input file>");
Console.WriteLine("For more info visit http://acm.tju.edu.cn/toj/showp1016.html");
}
}
}
WordsDictionaryTrie.cs:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;
namespace T9
{
class WordsDictionaryTrie
{
class Node
{
public char letter;
public int probabilityWordEnding;
public Hashtable htNextNodes;
private Node() { }
public Node(char letter,
int probabilityWordEnding)
{
this.letter = letter;
this.probabilityWordEnding = probabilityWordEnding;
this.htNextNodes = null;
}
}
private Node root;
private Hashtable wordsAlreadyPrinted;
public WordsDictionaryTrie()
{
this.root = new Node('*',
0);
this.wordsAlreadyPrinted = new Hashtable();
}
public void AddWord(string word,
int probability)
{
_AddWord(this.root,
word,
0,
probability);
}
private void _AddWord(Node currentNode,
string word,
int currentPosition,
int probability)
{
if (currentNode == null ||
currentPosition >= word.Length ||
word == null ||
word.Length == 0)
{
return;
}
if (currentNode.htNextNodes == null)
{
currentNode.htNextNodes = new Hashtable();
}
if (!currentNode.htNextNodes.ContainsKey(word[currentPosition]))
{
Node newNode = new Node(word[currentPosition],
0);
currentNode.htNextNodes.Add(word[currentPosition],
newNode);
}
currentNode = (Node)currentNode.htNextNodes[word[currentPosition]];
currentNode.probabilityWordEnding += probability;
_AddWord(currentNode,
word,
currentPosition + 1,
probability);
}
public void PrintT9Words(string sequenceOfDigits)
{
if (sequenceOfDigits == null)
{
return;
}
Hashtable htNumberKeys = new Hashtable();
htNumberKeys.Add('2', "abc");
htNumberKeys.Add('3', "def");
htNumberKeys.Add('4', "ghi");
htNumberKeys.Add('5', "jkl");
htNumberKeys.Add('6', "mno");
htNumberKeys.Add('7', "pqrs");
htNumberKeys.Add('8', "tuv");
htNumberKeys.Add('9', "wxyz");
bool prefixDoesNotExist = false;
for (int i = 0; i < sequenceOfDigits.Length - 1; i++)
{
string prefixedSequence = sequenceOfDigits.Substring(0, i + 1);
string bestWord = "";
int bestProbability = Int32.MinValue;
if (!prefixDoesNotExist)
{
_GetWordWithBestProbability(ref htNumberKeys,
prefixedSequence,
0,
"",
ref bestWord,
ref bestProbability);
}
if (bestWord == "")
{
Console.WriteLine("MANUALLY");
prefixDoesNotExist = true;
}
else
{
Console.WriteLine("{0} (debugging: overall prefix probability = {1})", bestWord, bestProbability);
}
}
}
private string _CompareStringsAlphabeticalOrder(string s1,
string s2)
{
if (s1 == null ||
s2 == null)
{
return null;
}
int minLen = Math.Min(s1.Length,
s2.Length);
for (int i = 0; i < minLen; i++)
{
if (s1[i] < s2[i])
{
return s1;
}
if (s2[i] < s1[i])
{
return s2;
}
}
if (s1.Length < s2.Length)
{
return s1;
}
else
{
return s2;
}
}
private void _GetWordWithBestProbability(ref Hashtable htNumberKeys,
string sequenceOfDigits,
int currentPosition,
string currentWord,
ref string bestWord,
ref int bestProbability)
{
if (currentPosition >= sequenceOfDigits.Length)
{
int currentProbability = _GetWordProbability(currentWord);
if (currentProbability != -1 &&
currentProbability > bestProbability)
{
bestProbability = currentProbability;
bestWord = currentWord;
}
else if (currentProbability == bestProbability)
{
bestWord = _CompareStringsAlphabeticalOrder(bestWord, currentWord);
}
return;
}
if (htNumberKeys.ContainsKey(sequenceOfDigits[currentPosition]))
{
string mappedLetters = (string)htNumberKeys[sequenceOfDigits[currentPosition]];
foreach (char mappedLetter in mappedLetters)
{
_GetWordWithBestProbability(ref htNumberKeys,
sequenceOfDigits,
currentPosition + 1,
currentWord + mappedLetter.ToString(),
ref bestWord,
ref bestProbability);
}
}
}
private int _GetWordProbability(string word)
{
int probability = -1;
if (word == null)
{
return probability;
}
Node currentNode = this.root;
for (int i = 0; i < word.Length; i++)
{
if (currentNode.htNextNodes != null &&
currentNode.htNextNodes.ContainsKey(word[i]))
{
Node nextNode = (Node)currentNode.htNextNodes[word[i]];
if (probability == -1)
{
probability = nextNode.probabilityWordEnding;
}
else
{
probability += nextNode.probabilityWordEnding;
}
currentNode = nextNode;
}
else
{
probability = -1;
break;
}
}
return probability;
}
}
}
Here is the output from the code above for the sample input:
Scenario #1:
i (debugging: overall prefix probability = 8)
id (debugging: overall prefix probability = 16)
hel (debugging: overall prefix probability = 21)
hell (debugging: overall prefix probability = 28)
hello (debugging: overall prefix probability = 32)
i (debugging: overall prefix probability = 8)
id (debugging: overall prefix probability = 16)
ide (debugging: overall prefix probability = 24)
idea (debugging: overall prefix probability = 32)
Scenario #2:
p (debugging: overall prefix probability = 4)
pr (debugging: overall prefix probability = 8)
pro (debugging: overall prefix probability = 12)
prog (debugging: overall prefix probability = 16)
progr (debugging: overall prefix probability = 20)
progra (debugging: overall prefix probability = 24)
program (debugging: overall prefix probability = 28)
n (debugging: overall prefix probability = 14)
ne (debugging: overall prefix probability = 28)
new (debugging: overall prefix probability = 42)
g (debugging: overall prefix probability = 13)
in (debugging: overall prefix probability = 12)
int (debugging: overall prefix probability = 18)
c (debugging: overall prefix probability = 6)
co (debugging: overall prefix probability = 12)
con (debugging: overall prefix probability = 18)
cont (debugging: overall prefix probability = 24)
anoth (debugging: overall prefix probability = 25)
anothe (debugging: overall prefix probability = 30)
another (debugging: overall prefix probability = 35)
p (debugging: overall prefix probability = 4)
pr (debugging: overall prefix probability = 8)
MANUALLY
MANUALLY
Comments
Post a Comment