418 lines
18 KiB
C#
418 lines
18 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading.Tasks;
|
|
using System.IO;
|
|
using MarketData.DataAccess;
|
|
using MarketData.Utils;
|
|
using MarketData.MarketDataModel;
|
|
|
|
namespace MarketData.Generator
|
|
{
|
|
public class SentimentGenerator
|
|
{
|
|
private LexicalDictionary negativeLexicon=null;
|
|
private LexicalDictionary positiveLexicon=null;
|
|
private WordDictionary wordDictionary=null;
|
|
private static SentimentGenerator sentimentGeneratorInstance=null;
|
|
private SentimentGenerator()
|
|
{
|
|
// InitializeLexicalDatabase(); // Initialize the lexical sentiment database
|
|
// UpdatePartsOfSpeech(); // and update the parts of speech
|
|
// InitializeWordDatabase(); // Initialize the master word dictionary
|
|
negativeLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.NEGATIVE_SENTIMENT);
|
|
positiveLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.POSITIVE_SENTIMENT);
|
|
wordDictionary=DictionaryDA.GetDictionaryCollection().ToDictionary();
|
|
}
|
|
public static SentimentGenerator GetInstance()
|
|
{
|
|
lock (typeof(SentimentGenerator))
|
|
{
|
|
if (null == sentimentGeneratorInstance) sentimentGeneratorInstance = new SentimentGenerator();
|
|
return sentimentGeneratorInstance;
|
|
}
|
|
}
|
|
// ********************************************************************************************************************************************************************************************
|
|
// ******************************************************************************* B O O T S T R A P P I N G **********************************************************************************
|
|
// ********************************************************************************************************************************************************************************************
|
|
private static void InitializeWordDatabase()
|
|
{
|
|
List<String> verbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\verbs.txt").Values.ToList<String>();
|
|
List<String> adjectives=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adjectives.txt").Values.ToList<String>();
|
|
List<String> adverbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adverbs.txt").Values.ToList<String>();
|
|
List<String> nouns=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\nouns.txt").Values.ToList<String>();
|
|
|
|
DictionaryDA.TruncateDictionary();
|
|
DictionaryCollection collection=new DictionaryCollection();
|
|
foreach(String word in verbs)collection.Add(new DictionaryElement(word,LexicalElement.VERB));
|
|
DictionaryDA.InsertDictionaryCollection(collection);
|
|
|
|
collection=new DictionaryCollection();
|
|
foreach(String word in adjectives)collection.Add(new DictionaryElement(word,LexicalElement.ADJECTIVE));
|
|
DictionaryDA.InsertDictionaryCollection(collection);
|
|
|
|
collection=new DictionaryCollection();
|
|
foreach(String word in adverbs)collection.Add(new DictionaryElement(word,LexicalElement.ADVERB));
|
|
DictionaryDA.InsertDictionaryCollection(collection);
|
|
|
|
collection=new DictionaryCollection();
|
|
foreach(String word in nouns)collection.Add(new DictionaryElement(word,LexicalElement.NOUN));
|
|
DictionaryDA.InsertDictionaryCollection(collection);
|
|
}
|
|
private static void InitializeLexicalDatabase()
|
|
{
|
|
Dictionary<String,String> negativeLexicon=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\negativelexicon.txt");
|
|
Dictionary<String,String> positiveLexicon=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\positivelexicon.txt");
|
|
|
|
List<String> negativeItems=new List<String>(negativeLexicon.Values);
|
|
List<String> positiveItems=new List<String>(positiveLexicon.Values);
|
|
|
|
LexicalCollection positiveLexicalElements=new LexicalCollection();
|
|
LexicalCollection negativeLexicalElements=new LexicalCollection();
|
|
|
|
foreach(String s in negativeItems)negativeLexicalElements.Add(new LexicalElement(s,LexicalElement.NEGATIVE_SENTIMENT));
|
|
foreach(String s in positiveItems)positiveLexicalElements.Add(new LexicalElement(s,LexicalElement.POSITIVE_SENTIMENT));
|
|
|
|
LexicalDA.TruncateLexicon();
|
|
LexicalDA.InsertLexicalCollection(negativeLexicalElements);
|
|
LexicalDA.InsertLexicalCollection(positiveLexicalElements);
|
|
}
|
|
private static void UpdatePartsOfSpeech()
|
|
{
|
|
List<String> verbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\verbs.txt").Values.ToList<String>();
|
|
List<String> adjectives=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adjectives.txt").Values.ToList<String>();
|
|
List<String> adverbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adverbs.txt").Values.ToList<String>();
|
|
List<String> nouns=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\nouns.txt").Values.ToList<String>();
|
|
|
|
LexicalDictionary negativeLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.NEGATIVE_SENTIMENT);
|
|
LexicalDictionary positiveLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.POSITIVE_SENTIMENT);
|
|
|
|
ApplyList(verbs,negativeLexicon,positiveLexicon,LexicalElement.VERB);
|
|
ApplyList(adjectives,negativeLexicon,positiveLexicon,LexicalElement.ADJECTIVE);
|
|
ApplyList(adverbs,negativeLexicon,positiveLexicon,LexicalElement.ADVERB);
|
|
ApplyList(nouns,negativeLexicon,positiveLexicon,LexicalElement.NOUN);
|
|
|
|
LexicalDA.UpdateLexicalElements(negativeLexicon.ToList());
|
|
LexicalDA.UpdateLexicalElements(positiveLexicon.ToList());
|
|
}
|
|
private static void ApplyList(List<String> list,Dictionary<String,LexicalElement> negativeItems,Dictionary<String,LexicalElement> positiveItems,String partOfSpeech)
|
|
{
|
|
foreach(String item in list)
|
|
{
|
|
if(negativeItems.ContainsKey(item))
|
|
{
|
|
LexicalElement lexicalElement=negativeItems[item];
|
|
if(null==lexicalElement.PartOfSpeech)lexicalElement.PartOfSpeech=partOfSpeech;
|
|
}
|
|
if(positiveItems.ContainsKey(item))
|
|
{
|
|
LexicalElement lexicalElement=positiveItems[item];
|
|
if(null==lexicalElement.PartOfSpeech)lexicalElement.PartOfSpeech=partOfSpeech;
|
|
}
|
|
}
|
|
}
|
|
private static Dictionary<String,String> LoadFile(String pathFileName)
|
|
{
|
|
StreamReader streamReader=null;
|
|
Dictionary<String,String> items=new Dictionary<String,String>();
|
|
try
|
|
{
|
|
streamReader = File.OpenText(pathFileName);
|
|
String strLine = null;
|
|
while (null != (strLine = streamReader.ReadLine()))
|
|
{
|
|
if ("".Equals(strLine))continue;
|
|
if(items.ContainsKey(strLine))continue;
|
|
items.Add(strLine,strLine);
|
|
}
|
|
return items;
|
|
}
|
|
catch(Exception /*exception*/)
|
|
{
|
|
return null;
|
|
}
|
|
finally
|
|
{
|
|
if(null!=streamReader)
|
|
{
|
|
streamReader.Close();
|
|
streamReader.Dispose();
|
|
}
|
|
}
|
|
}
|
|
// ******************************************************************************************************************************************************************************************
|
|
// ******************************************************************************************************************************************************************************************
|
|
// ******************************************************************************************************************************************************************************************
|
|
public HeadlineSentiment ProcessHeadline(Headline headline)
|
|
{
|
|
lock(this)
|
|
{
|
|
String sentiment=ProcessHeadlineEntry(headline.Entry);
|
|
return new HeadlineSentiment(headline,sentiment);
|
|
}
|
|
}
|
|
public String ProcessHeadlineEntry(String headline)
|
|
{
|
|
lock(this)
|
|
{
|
|
StringBuilder sb=new StringBuilder();
|
|
Dictionary<String,String> uniqueWords=new Dictionary<String,String>();
|
|
// String[] words=headline.Split(' ');
|
|
String[] words=headline.Split(new char[]{' ','-'});
|
|
double positionRank=1;
|
|
double positiveRank=0.00;
|
|
double negativeRank=0.00;
|
|
|
|
for(int index=words.Length-1;index>=0;index--)
|
|
{
|
|
String word=words[index];
|
|
String cleanWord=word.Trim().ToLower();
|
|
|
|
cleanWord=CleanASCII(cleanWord);
|
|
if(!Keep(cleanWord))continue;
|
|
cleanWord=Clean(cleanWord);
|
|
cleanWord=CleanStartsWithEndsWith(cleanWord);
|
|
if(!KeepStartsWithEndsWith(cleanWord))continue;
|
|
if(IsNumber(word))continue;
|
|
cleanWord=cleanWord.Trim();
|
|
if(0==cleanWord.Length)continue;
|
|
|
|
bool isNegative=IsNegativeWord(cleanWord);
|
|
bool isPositive=IsPositiveWord(cleanWord);
|
|
bool isVerb=IsPartOfSpeech(cleanWord,LexicalElement.VERB);
|
|
bool isNoun=IsPartOfSpeech(cleanWord,LexicalElement.NOUN);
|
|
bool isAdjective=IsPartOfSpeech(cleanWord,LexicalElement.ADJECTIVE);
|
|
|
|
// Console.WriteLine(String.Format("Word '{0}':{1}:{2}:Pr{3}",cleanWord,LexicalElement.VERB,isPositive?"Positive":isNegative?"Negative":"Neutral",positionRank));
|
|
if(isNegative)
|
|
{
|
|
negativeRank+=positionRank;
|
|
if(isAdjective&&index==0) // it's describing the next word so don't adjust anything
|
|
{
|
|
negativeRank-=positionRank;
|
|
}
|
|
else if(isVerb&&index>0) // if the word is a verb check to see if previous word is an adjective thus strengthening it
|
|
{
|
|
String preceedingWord=words[index-1].Trim().ToLower();
|
|
if(IsPartOfSpeech(preceedingWord,LexicalElement.ADJECTIVE))
|
|
{
|
|
// Console.WriteLine(String.Format("Preceeding Word {0}:{1}",preceedingWord,LexicalElement.ADJECTIVE));
|
|
negativeRank+=(--positionRank);
|
|
index--;
|
|
continue;
|
|
}
|
|
else if(isNoun) // if the word is also a noun and we have positive verb following the we are doing negative things to something positive
|
|
{
|
|
List<String> nextNouns = FindNextNouns(words, index + 1);
|
|
for (int nounIndex = 0; nounIndex < nextNouns.Count; nounIndex++)
|
|
{
|
|
isNegative = IsNegativeWord(nextNouns[nounIndex]);
|
|
isPositive = IsPositiveWord(nextNouns[nounIndex]);
|
|
if (isPositive) negativeRank++;
|
|
}
|
|
}
|
|
}
|
|
else if(isNoun&&index>0)
|
|
{
|
|
List<String> nextVerbs=FindNextVerbs(words,index+1); // if we have a negative noun then look for a following positive verb that would strengthen the negativity
|
|
for(int verbIndex=0;verbIndex<nextVerbs.Count;verbIndex++)
|
|
{
|
|
isNegative = IsNegativeWord(nextVerbs[verbIndex]);
|
|
isPositive = IsPositiveWord(nextVerbs[verbIndex]);
|
|
if(isPositive)negativeRank++;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
else if(isPositive)
|
|
{
|
|
positiveRank+=positionRank;
|
|
if (isAdjective && index == 0) // it's describing the next word so don't adjust anything
|
|
{
|
|
positiveRank -= positionRank;
|
|
}
|
|
else if(isVerb&&index>0) // if the word is a verb check to see if previous word is an adjective thus strengthening it
|
|
{
|
|
String preceedingWord=words[index-1].Trim().ToLower();
|
|
if(IsPartOfSpeech(preceedingWord,LexicalElement.ADJECTIVE))
|
|
{
|
|
// Console.WriteLine(String.Format("Preceeding Word {0}:{1}",preceedingWord,LexicalElement.ADJECTIVE));
|
|
positiveRank+=(--positionRank);
|
|
index--;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
if(uniqueWords.ContainsKey(cleanWord))continue;
|
|
uniqueWords.Add(cleanWord,cleanWord);
|
|
positionRank++;
|
|
} // for each word in headline
|
|
String headlineSentiment=LexicalElement.NEUTRAL_SENTIMENT;
|
|
if(positiveRank>negativeRank)headlineSentiment=LexicalElement.POSITIVE_SENTIMENT;
|
|
else if(negativeRank>positiveRank)headlineSentiment=LexicalElement.NEGATIVE_SENTIMENT;
|
|
return headlineSentiment;
|
|
}
|
|
}
|
|
private List<String> FindNextVerbs(String[] words, int startingIndex)
|
|
{
|
|
List<String> verbs=new List<String>();
|
|
for (int index = startingIndex; index < words.Length; index++)
|
|
{
|
|
if(IsPartOfSpeech(words[index],LexicalElement.VERB))verbs.Add(words[index]);
|
|
}
|
|
return verbs;
|
|
}
|
|
private List<String> FindNextNouns(String[] words, int startingIndex)
|
|
{
|
|
List<String> verbs = new List<String>();
|
|
for (int index = startingIndex; index < words.Length; index++)
|
|
{
|
|
if (IsPartOfSpeech(words[index], LexicalElement.NOUN)) verbs.Add(words[index]);
|
|
}
|
|
return verbs;
|
|
}
|
|
public void PrintHeadlineEntry(String headline)
|
|
{
|
|
lock (this)
|
|
{
|
|
StringBuilder sb = new StringBuilder();
|
|
Dictionary<String, String> uniqueWords = new Dictionary<String, String>();
|
|
String[] words = headline.Split(new char[] { ' ', '-' });
|
|
|
|
for (int index = words.Length - 1; index >= 0; index--)
|
|
{
|
|
String word = words[index];
|
|
String cleanWord = word.Trim().ToLower();
|
|
|
|
cleanWord = CleanASCII(cleanWord);
|
|
if (!Keep(cleanWord)) continue;
|
|
cleanWord = Clean(cleanWord);
|
|
cleanWord = CleanStartsWithEndsWith(cleanWord);
|
|
if (!KeepStartsWithEndsWith(cleanWord)) continue;
|
|
if (IsNumber(word)) continue;
|
|
cleanWord = cleanWord.Trim();
|
|
if (0 == cleanWord.Length) continue;
|
|
|
|
bool isNegative = IsNegativeWord(cleanWord);
|
|
bool isPositive = IsPositiveWord(cleanWord);
|
|
bool isVerb = IsPartOfSpeech(cleanWord, LexicalElement.VERB);
|
|
bool isNoun = IsPartOfSpeech(cleanWord, LexicalElement.NOUN);
|
|
String partOfSpeech = null;
|
|
|
|
if (isNegative || isPositive) partOfSpeech = GetPartOfSpeech(cleanWord);
|
|
if (null == partOfSpeech) partOfSpeech = GetDictionaryPartOfSpeech(word);
|
|
if (null == partOfSpeech) partOfSpeech = "?";
|
|
Console.WriteLine(String.Format("Word '{0}':{1}:{2}", cleanWord, partOfSpeech, isPositive ? "Positive" : isNegative ? "Negative" : "Neutral"));
|
|
if (uniqueWords.ContainsKey(cleanWord)) continue;
|
|
uniqueWords.Add(cleanWord, cleanWord);
|
|
} // for each word in headline
|
|
}
|
|
}
|
|
public HeadlinesSentiment ProcessHeadlines(Headlines headlines)
|
|
{
|
|
lock(this)
|
|
{
|
|
HeadlinesSentiment headlineSentimentList=new HeadlinesSentiment();
|
|
foreach(Headline headline in headlines)
|
|
{
|
|
HeadlineSentiment headlineSentiment=ProcessHeadline(headline);
|
|
headlineSentimentList.Add(headlineSentiment);
|
|
} // for each headline
|
|
return headlineSentimentList;
|
|
}
|
|
}
|
|
private bool IsNegativeWord(String word)
|
|
{
|
|
if(negativeLexicon.ContainsKey(word))return true;
|
|
return false;
|
|
}
|
|
private bool IsPositiveWord(String word)
|
|
{
|
|
if(positiveLexicon.ContainsKey(word))return true;
|
|
return false;
|
|
}
|
|
private bool IsPartOfSpeech(String word,String partOfSpeech)
|
|
{
|
|
if(null==word||null==partOfSpeech)return false;
|
|
if (positiveLexicon.ContainsKey(word) && partOfSpeech.Equals(positiveLexicon[word].PartOfSpeech, StringComparison.OrdinalIgnoreCase)) return true;
|
|
if (negativeLexicon.ContainsKey(word) && partOfSpeech.Equals(negativeLexicon[word].PartOfSpeech, StringComparison.OrdinalIgnoreCase)) return true;
|
|
if(wordDictionary.ContainsKeyAs(word,partOfSpeech))return true;
|
|
return false;
|
|
}
|
|
private String GetPartOfSpeech(String word)
|
|
{
|
|
if(positiveLexicon.ContainsKey(word))return positiveLexicon[word].PartOfSpeech;
|
|
else if(negativeLexicon.ContainsKey(word))return negativeLexicon[word].PartOfSpeech;
|
|
return null;
|
|
}
|
|
private String GetDictionaryPartOfSpeech(String word)
|
|
{
|
|
if(!wordDictionary.ContainsKey(word))return null;
|
|
if(wordDictionary.ContainsKeyAs(word,LexicalElement.ADJECTIVE))return LexicalElement.ADJECTIVE;
|
|
if(wordDictionary.ContainsKeyAs(word,LexicalElement.ADVERB))return LexicalElement.ADVERB;
|
|
if(wordDictionary.ContainsKeyAs(word,LexicalElement.NOUN))return LexicalElement.NOUN;
|
|
if(wordDictionary.ContainsKeyAs(word,LexicalElement.VERB))return LexicalElement.VERB;
|
|
if(wordDictionary.ContainsKeyAs(word,LexicalElement.CONJUNCTION))return LexicalElement.CONJUNCTION;
|
|
return null;
|
|
}
|
|
private bool Keep(String word)
|
|
{
|
|
// String[] removeList={"/","%",",",":","-",";","$",">","+","?","_","#","&"};
|
|
// String[] removeList={"/","%","-",";","$",">","+","?","_","#","&"};
|
|
String[] removeList={"/","%",";","$",">","+","?","_","#","&"};
|
|
foreach(String match in removeList)
|
|
{
|
|
if(word.Contains(match))return false;
|
|
}
|
|
return true;
|
|
}
|
|
private string CleanASCII(string s)
|
|
{
|
|
StringBuilder sb = new StringBuilder(s.Length);
|
|
foreach(char c in s)
|
|
{
|
|
if((int)c > 127)continue;
|
|
if((int)c < 32)continue;
|
|
sb.Append(c);
|
|
}
|
|
return sb.ToString();
|
|
}
|
|
private String Clean(String word)
|
|
{
|
|
String[] replaceList={"\"","?","~","|",":",","};
|
|
// String[] replaceList={"(",")","\"","?","~","|",":",","};
|
|
foreach(String toReplace in replaceList)
|
|
{
|
|
word=word.Replace(toReplace,null);
|
|
}
|
|
return word;
|
|
}
|
|
private String CleanStartsWithEndsWith(String word)
|
|
{
|
|
String[] replaceList={"'","`","[","]","!"};
|
|
foreach(String toReplace in replaceList)
|
|
{
|
|
if(word.StartsWith(toReplace))word=word.Substring(1);
|
|
if(word.EndsWith(toReplace))word=word.Substring(0,word.Length-2);
|
|
}
|
|
return word;
|
|
}
|
|
private bool KeepStartsWithEndsWith(String word)
|
|
{
|
|
String[] replaceList={".","~","1","2","3","4","5","6","7","8","9","0"};
|
|
foreach(String toReplace in replaceList)
|
|
{
|
|
if(word.StartsWith(toReplace))return false;
|
|
if(word.EndsWith(toReplace))return false;
|
|
}
|
|
return true;
|
|
}
|
|
private bool IsNumber(String word)
|
|
{
|
|
double result=double.NaN;
|
|
return double.TryParse(word,out result);
|
|
}
|
|
}
|
|
}
|