Files
marketdata/MarketDataLib/Generator/SentimentGenerator.cs
2024-02-22 14:52:53 -05:00

418 lines
18 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using MarketData.DataAccess;
using MarketData.Utils;
using MarketData.MarketDataModel;
namespace MarketData.Generator
{
public class SentimentGenerator
{
private LexicalDictionary negativeLexicon=null;
private LexicalDictionary positiveLexicon=null;
private WordDictionary wordDictionary=null;
private static SentimentGenerator sentimentGeneratorInstance=null;
private SentimentGenerator()
{
// InitializeLexicalDatabase(); // Initialize the lexical sentiment database
// UpdatePartsOfSpeech(); // and update the parts of speech
// InitializeWordDatabase(); // Initialize the master word dictionary
negativeLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.NEGATIVE_SENTIMENT);
positiveLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.POSITIVE_SENTIMENT);
wordDictionary=DictionaryDA.GetDictionaryCollection().ToDictionary();
}
public static SentimentGenerator GetInstance()
{
lock (typeof(SentimentGenerator))
{
if (null == sentimentGeneratorInstance) sentimentGeneratorInstance = new SentimentGenerator();
return sentimentGeneratorInstance;
}
}
// ********************************************************************************************************************************************************************************************
// ******************************************************************************* B O O T S T R A P P I N G **********************************************************************************
// ********************************************************************************************************************************************************************************************
private static void InitializeWordDatabase()
{
List<String> verbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\verbs.txt").Values.ToList<String>();
List<String> adjectives=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adjectives.txt").Values.ToList<String>();
List<String> adverbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adverbs.txt").Values.ToList<String>();
List<String> nouns=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\nouns.txt").Values.ToList<String>();
DictionaryDA.TruncateDictionary();
DictionaryCollection collection=new DictionaryCollection();
foreach(String word in verbs)collection.Add(new DictionaryElement(word,LexicalElement.VERB));
DictionaryDA.InsertDictionaryCollection(collection);
collection=new DictionaryCollection();
foreach(String word in adjectives)collection.Add(new DictionaryElement(word,LexicalElement.ADJECTIVE));
DictionaryDA.InsertDictionaryCollection(collection);
collection=new DictionaryCollection();
foreach(String word in adverbs)collection.Add(new DictionaryElement(word,LexicalElement.ADVERB));
DictionaryDA.InsertDictionaryCollection(collection);
collection=new DictionaryCollection();
foreach(String word in nouns)collection.Add(new DictionaryElement(word,LexicalElement.NOUN));
DictionaryDA.InsertDictionaryCollection(collection);
}
private static void InitializeLexicalDatabase()
{
Dictionary<String,String> negativeLexicon=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\negativelexicon.txt");
Dictionary<String,String> positiveLexicon=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\positivelexicon.txt");
List<String> negativeItems=new List<String>(negativeLexicon.Values);
List<String> positiveItems=new List<String>(positiveLexicon.Values);
LexicalCollection positiveLexicalElements=new LexicalCollection();
LexicalCollection negativeLexicalElements=new LexicalCollection();
foreach(String s in negativeItems)negativeLexicalElements.Add(new LexicalElement(s,LexicalElement.NEGATIVE_SENTIMENT));
foreach(String s in positiveItems)positiveLexicalElements.Add(new LexicalElement(s,LexicalElement.POSITIVE_SENTIMENT));
LexicalDA.TruncateLexicon();
LexicalDA.InsertLexicalCollection(negativeLexicalElements);
LexicalDA.InsertLexicalCollection(positiveLexicalElements);
}
private static void UpdatePartsOfSpeech()
{
List<String> verbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\verbs.txt").Values.ToList<String>();
List<String> adjectives=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adjectives.txt").Values.ToList<String>();
List<String> adverbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adverbs.txt").Values.ToList<String>();
List<String> nouns=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\nouns.txt").Values.ToList<String>();
LexicalDictionary negativeLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.NEGATIVE_SENTIMENT);
LexicalDictionary positiveLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.POSITIVE_SENTIMENT);
ApplyList(verbs,negativeLexicon,positiveLexicon,LexicalElement.VERB);
ApplyList(adjectives,negativeLexicon,positiveLexicon,LexicalElement.ADJECTIVE);
ApplyList(adverbs,negativeLexicon,positiveLexicon,LexicalElement.ADVERB);
ApplyList(nouns,negativeLexicon,positiveLexicon,LexicalElement.NOUN);
LexicalDA.UpdateLexicalElements(negativeLexicon.ToList());
LexicalDA.UpdateLexicalElements(positiveLexicon.ToList());
}
private static void ApplyList(List<String> list,Dictionary<String,LexicalElement> negativeItems,Dictionary<String,LexicalElement> positiveItems,String partOfSpeech)
{
foreach(String item in list)
{
if(negativeItems.ContainsKey(item))
{
LexicalElement lexicalElement=negativeItems[item];
if(null==lexicalElement.PartOfSpeech)lexicalElement.PartOfSpeech=partOfSpeech;
}
if(positiveItems.ContainsKey(item))
{
LexicalElement lexicalElement=positiveItems[item];
if(null==lexicalElement.PartOfSpeech)lexicalElement.PartOfSpeech=partOfSpeech;
}
}
}
private static Dictionary<String,String> LoadFile(String pathFileName)
{
StreamReader streamReader=null;
Dictionary<String,String> items=new Dictionary<String,String>();
try
{
streamReader = File.OpenText(pathFileName);
String strLine = null;
while (null != (strLine = streamReader.ReadLine()))
{
if ("".Equals(strLine))continue;
if(items.ContainsKey(strLine))continue;
items.Add(strLine,strLine);
}
return items;
}
catch(Exception /*exception*/)
{
return null;
}
finally
{
if(null!=streamReader)
{
streamReader.Close();
streamReader.Dispose();
}
}
}
// ******************************************************************************************************************************************************************************************
// ******************************************************************************************************************************************************************************************
// ******************************************************************************************************************************************************************************************
public HeadlineSentiment ProcessHeadline(Headline headline)
{
lock(this)
{
String sentiment=ProcessHeadlineEntry(headline.Entry);
return new HeadlineSentiment(headline,sentiment);
}
}
public String ProcessHeadlineEntry(String headline)
{
lock(this)
{
StringBuilder sb=new StringBuilder();
Dictionary<String,String> uniqueWords=new Dictionary<String,String>();
// String[] words=headline.Split(' ');
String[] words=headline.Split(new char[]{' ','-'});
double positionRank=1;
double positiveRank=0.00;
double negativeRank=0.00;
for(int index=words.Length-1;index>=0;index--)
{
String word=words[index];
String cleanWord=word.Trim().ToLower();
cleanWord=CleanASCII(cleanWord);
if(!Keep(cleanWord))continue;
cleanWord=Clean(cleanWord);
cleanWord=CleanStartsWithEndsWith(cleanWord);
if(!KeepStartsWithEndsWith(cleanWord))continue;
if(IsNumber(word))continue;
cleanWord=cleanWord.Trim();
if(0==cleanWord.Length)continue;
bool isNegative=IsNegativeWord(cleanWord);
bool isPositive=IsPositiveWord(cleanWord);
bool isVerb=IsPartOfSpeech(cleanWord,LexicalElement.VERB);
bool isNoun=IsPartOfSpeech(cleanWord,LexicalElement.NOUN);
bool isAdjective=IsPartOfSpeech(cleanWord,LexicalElement.ADJECTIVE);
// Console.WriteLine(String.Format("Word '{0}':{1}:{2}:Pr{3}",cleanWord,LexicalElement.VERB,isPositive?"Positive":isNegative?"Negative":"Neutral",positionRank));
if(isNegative)
{
negativeRank+=positionRank;
if(isAdjective&&index==0) // it's describing the next word so don't adjust anything
{
negativeRank-=positionRank;
}
else if(isVerb&&index>0) // if the word is a verb check to see if previous word is an adjective thus strengthening it
{
String preceedingWord=words[index-1].Trim().ToLower();
if(IsPartOfSpeech(preceedingWord,LexicalElement.ADJECTIVE))
{
// Console.WriteLine(String.Format("Preceeding Word {0}:{1}",preceedingWord,LexicalElement.ADJECTIVE));
negativeRank+=(--positionRank);
index--;
continue;
}
else if(isNoun) // if the word is also a noun and we have positive verb following the we are doing negative things to something positive
{
List<String> nextNouns = FindNextNouns(words, index + 1);
for (int nounIndex = 0; nounIndex < nextNouns.Count; nounIndex++)
{
isNegative = IsNegativeWord(nextNouns[nounIndex]);
isPositive = IsPositiveWord(nextNouns[nounIndex]);
if (isPositive) negativeRank++;
}
}
}
else if(isNoun&&index>0)
{
List<String> nextVerbs=FindNextVerbs(words,index+1); // if we have a negative noun then look for a following positive verb that would strengthen the negativity
for(int verbIndex=0;verbIndex<nextVerbs.Count;verbIndex++)
{
isNegative = IsNegativeWord(nextVerbs[verbIndex]);
isPositive = IsPositiveWord(nextVerbs[verbIndex]);
if(isPositive)negativeRank++;
}
continue;
}
}
else if(isPositive)
{
positiveRank+=positionRank;
if (isAdjective && index == 0) // it's describing the next word so don't adjust anything
{
positiveRank -= positionRank;
}
else if(isVerb&&index>0) // if the word is a verb check to see if previous word is an adjective thus strengthening it
{
String preceedingWord=words[index-1].Trim().ToLower();
if(IsPartOfSpeech(preceedingWord,LexicalElement.ADJECTIVE))
{
// Console.WriteLine(String.Format("Preceeding Word {0}:{1}",preceedingWord,LexicalElement.ADJECTIVE));
positiveRank+=(--positionRank);
index--;
continue;
}
}
}
if(uniqueWords.ContainsKey(cleanWord))continue;
uniqueWords.Add(cleanWord,cleanWord);
positionRank++;
} // for each word in headline
String headlineSentiment=LexicalElement.NEUTRAL_SENTIMENT;
if(positiveRank>negativeRank)headlineSentiment=LexicalElement.POSITIVE_SENTIMENT;
else if(negativeRank>positiveRank)headlineSentiment=LexicalElement.NEGATIVE_SENTIMENT;
return headlineSentiment;
}
}
private List<String> FindNextVerbs(String[] words, int startingIndex)
{
List<String> verbs=new List<String>();
for (int index = startingIndex; index < words.Length; index++)
{
if(IsPartOfSpeech(words[index],LexicalElement.VERB))verbs.Add(words[index]);
}
return verbs;
}
private List<String> FindNextNouns(String[] words, int startingIndex)
{
List<String> verbs = new List<String>();
for (int index = startingIndex; index < words.Length; index++)
{
if (IsPartOfSpeech(words[index], LexicalElement.NOUN)) verbs.Add(words[index]);
}
return verbs;
}
public void PrintHeadlineEntry(String headline)
{
lock (this)
{
StringBuilder sb = new StringBuilder();
Dictionary<String, String> uniqueWords = new Dictionary<String, String>();
String[] words = headline.Split(new char[] { ' ', '-' });
for (int index = words.Length - 1; index >= 0; index--)
{
String word = words[index];
String cleanWord = word.Trim().ToLower();
cleanWord = CleanASCII(cleanWord);
if (!Keep(cleanWord)) continue;
cleanWord = Clean(cleanWord);
cleanWord = CleanStartsWithEndsWith(cleanWord);
if (!KeepStartsWithEndsWith(cleanWord)) continue;
if (IsNumber(word)) continue;
cleanWord = cleanWord.Trim();
if (0 == cleanWord.Length) continue;
bool isNegative = IsNegativeWord(cleanWord);
bool isPositive = IsPositiveWord(cleanWord);
bool isVerb = IsPartOfSpeech(cleanWord, LexicalElement.VERB);
bool isNoun = IsPartOfSpeech(cleanWord, LexicalElement.NOUN);
String partOfSpeech = null;
if (isNegative || isPositive) partOfSpeech = GetPartOfSpeech(cleanWord);
if (null == partOfSpeech) partOfSpeech = GetDictionaryPartOfSpeech(word);
if (null == partOfSpeech) partOfSpeech = "?";
Console.WriteLine(String.Format("Word '{0}':{1}:{2}", cleanWord, partOfSpeech, isPositive ? "Positive" : isNegative ? "Negative" : "Neutral"));
if (uniqueWords.ContainsKey(cleanWord)) continue;
uniqueWords.Add(cleanWord, cleanWord);
} // for each word in headline
}
}
public HeadlinesSentiment ProcessHeadlines(Headlines headlines)
{
lock(this)
{
HeadlinesSentiment headlineSentimentList=new HeadlinesSentiment();
foreach(Headline headline in headlines)
{
HeadlineSentiment headlineSentiment=ProcessHeadline(headline);
headlineSentimentList.Add(headlineSentiment);
} // for each headline
return headlineSentimentList;
}
}
private bool IsNegativeWord(String word)
{
if(negativeLexicon.ContainsKey(word))return true;
return false;
}
private bool IsPositiveWord(String word)
{
if(positiveLexicon.ContainsKey(word))return true;
return false;
}
private bool IsPartOfSpeech(String word,String partOfSpeech)
{
if(null==word||null==partOfSpeech)return false;
if (positiveLexicon.ContainsKey(word) && partOfSpeech.Equals(positiveLexicon[word].PartOfSpeech, StringComparison.OrdinalIgnoreCase)) return true;
if (negativeLexicon.ContainsKey(word) && partOfSpeech.Equals(negativeLexicon[word].PartOfSpeech, StringComparison.OrdinalIgnoreCase)) return true;
if(wordDictionary.ContainsKeyAs(word,partOfSpeech))return true;
return false;
}
private String GetPartOfSpeech(String word)
{
if(positiveLexicon.ContainsKey(word))return positiveLexicon[word].PartOfSpeech;
else if(negativeLexicon.ContainsKey(word))return negativeLexicon[word].PartOfSpeech;
return null;
}
private String GetDictionaryPartOfSpeech(String word)
{
if(!wordDictionary.ContainsKey(word))return null;
if(wordDictionary.ContainsKeyAs(word,LexicalElement.ADJECTIVE))return LexicalElement.ADJECTIVE;
if(wordDictionary.ContainsKeyAs(word,LexicalElement.ADVERB))return LexicalElement.ADVERB;
if(wordDictionary.ContainsKeyAs(word,LexicalElement.NOUN))return LexicalElement.NOUN;
if(wordDictionary.ContainsKeyAs(word,LexicalElement.VERB))return LexicalElement.VERB;
if(wordDictionary.ContainsKeyAs(word,LexicalElement.CONJUNCTION))return LexicalElement.CONJUNCTION;
return null;
}
private bool Keep(String word)
{
// String[] removeList={"/","%",",",":","-",";","$",">","+","?","_","#","&"};
// String[] removeList={"/","%","-",";","$",">","+","?","_","#","&"};
String[] removeList={"/","%",";","$",">","+","?","_","#","&"};
foreach(String match in removeList)
{
if(word.Contains(match))return false;
}
return true;
}
private string CleanASCII(string s)
{
StringBuilder sb = new StringBuilder(s.Length);
foreach(char c in s)
{
if((int)c > 127)continue;
if((int)c < 32)continue;
sb.Append(c);
}
return sb.ToString();
}
private String Clean(String word)
{
String[] replaceList={"\"","?","~","|",":",","};
// String[] replaceList={"(",")","\"","?","~","|",":",","};
foreach(String toReplace in replaceList)
{
word=word.Replace(toReplace,null);
}
return word;
}
private String CleanStartsWithEndsWith(String word)
{
String[] replaceList={"'","`","[","]","!"};
foreach(String toReplace in replaceList)
{
if(word.StartsWith(toReplace))word=word.Substring(1);
if(word.EndsWith(toReplace))word=word.Substring(0,word.Length-2);
}
return word;
}
private bool KeepStartsWithEndsWith(String word)
{
String[] replaceList={".","~","1","2","3","4","5","6","7","8","9","0"};
foreach(String toReplace in replaceList)
{
if(word.StartsWith(toReplace))return false;
if(word.EndsWith(toReplace))return false;
}
return true;
}
private bool IsNumber(String word)
{
double result=double.NaN;
return double.TryParse(word,out result);
}
}
}