using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.IO; using MarketData.DataAccess; using MarketData.Utils; using MarketData.MarketDataModel; namespace MarketData.Generator { public class SentimentGenerator { private LexicalDictionary negativeLexicon=null; private LexicalDictionary positiveLexicon=null; private WordDictionary wordDictionary=null; private static SentimentGenerator sentimentGeneratorInstance=null; private SentimentGenerator() { // InitializeLexicalDatabase(); // Initialize the lexical sentiment database // UpdatePartsOfSpeech(); // and update the parts of speech // InitializeWordDatabase(); // Initialize the master word dictionary negativeLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.NEGATIVE_SENTIMENT); positiveLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.POSITIVE_SENTIMENT); wordDictionary=DictionaryDA.GetDictionaryCollection().ToDictionary(); } public static SentimentGenerator GetInstance() { lock (typeof(SentimentGenerator)) { if (null == sentimentGeneratorInstance) sentimentGeneratorInstance = new SentimentGenerator(); return sentimentGeneratorInstance; } } // ******************************************************************************************************************************************************************************************** // ******************************************************************************* B O O T S T R A P P I N G ********************************************************************************** // ******************************************************************************************************************************************************************************************** private static void InitializeWordDatabase() { List verbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\verbs.txt").Values.ToList(); List adjectives=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adjectives.txt").Values.ToList(); List adverbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adverbs.txt").Values.ToList(); List nouns=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\nouns.txt").Values.ToList(); DictionaryDA.TruncateDictionary(); DictionaryCollection collection=new DictionaryCollection(); foreach(String word in verbs)collection.Add(new DictionaryElement(word,LexicalElement.VERB)); DictionaryDA.InsertDictionaryCollection(collection); collection=new DictionaryCollection(); foreach(String word in adjectives)collection.Add(new DictionaryElement(word,LexicalElement.ADJECTIVE)); DictionaryDA.InsertDictionaryCollection(collection); collection=new DictionaryCollection(); foreach(String word in adverbs)collection.Add(new DictionaryElement(word,LexicalElement.ADVERB)); DictionaryDA.InsertDictionaryCollection(collection); collection=new DictionaryCollection(); foreach(String word in nouns)collection.Add(new DictionaryElement(word,LexicalElement.NOUN)); DictionaryDA.InsertDictionaryCollection(collection); } private static void InitializeLexicalDatabase() { Dictionary negativeLexicon=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\negativelexicon.txt"); Dictionary positiveLexicon=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\positivelexicon.txt"); List negativeItems=new List(negativeLexicon.Values); List positiveItems=new List(positiveLexicon.Values); LexicalCollection positiveLexicalElements=new LexicalCollection(); LexicalCollection negativeLexicalElements=new LexicalCollection(); foreach(String s in negativeItems)negativeLexicalElements.Add(new LexicalElement(s,LexicalElement.NEGATIVE_SENTIMENT)); foreach(String s in positiveItems)positiveLexicalElements.Add(new LexicalElement(s,LexicalElement.POSITIVE_SENTIMENT)); LexicalDA.TruncateLexicon(); LexicalDA.InsertLexicalCollection(negativeLexicalElements); LexicalDA.InsertLexicalCollection(positiveLexicalElements); } private static void UpdatePartsOfSpeech() { List verbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\verbs.txt").Values.ToList(); List adjectives=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adjectives.txt").Values.ToList(); List adverbs=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\adverbs.txt").Values.ToList(); List nouns=LoadFile(@"C:\boneyard\marketdata\LexicalAnalysis\nouns.txt").Values.ToList(); LexicalDictionary negativeLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.NEGATIVE_SENTIMENT); LexicalDictionary positiveLexicon=LexicalDA.GetLexicalCollection().ToDictionary(LexicalElement.POSITIVE_SENTIMENT); ApplyList(verbs,negativeLexicon,positiveLexicon,LexicalElement.VERB); ApplyList(adjectives,negativeLexicon,positiveLexicon,LexicalElement.ADJECTIVE); ApplyList(adverbs,negativeLexicon,positiveLexicon,LexicalElement.ADVERB); ApplyList(nouns,negativeLexicon,positiveLexicon,LexicalElement.NOUN); LexicalDA.UpdateLexicalElements(negativeLexicon.ToList()); LexicalDA.UpdateLexicalElements(positiveLexicon.ToList()); } private static void ApplyList(List list,Dictionary negativeItems,Dictionary positiveItems,String partOfSpeech) { foreach(String item in list) { if(negativeItems.ContainsKey(item)) { LexicalElement lexicalElement=negativeItems[item]; if(null==lexicalElement.PartOfSpeech)lexicalElement.PartOfSpeech=partOfSpeech; } if(positiveItems.ContainsKey(item)) { LexicalElement lexicalElement=positiveItems[item]; if(null==lexicalElement.PartOfSpeech)lexicalElement.PartOfSpeech=partOfSpeech; } } } private static Dictionary LoadFile(String pathFileName) { StreamReader streamReader=null; Dictionary items=new Dictionary(); try { streamReader = File.OpenText(pathFileName); String strLine = null; while (null != (strLine = streamReader.ReadLine())) { if ("".Equals(strLine))continue; if(items.ContainsKey(strLine))continue; items.Add(strLine,strLine); } return items; } catch(Exception /*exception*/) { return null; } finally { if(null!=streamReader) { streamReader.Close(); streamReader.Dispose(); } } } // ****************************************************************************************************************************************************************************************** // ****************************************************************************************************************************************************************************************** // ****************************************************************************************************************************************************************************************** public HeadlineSentiment ProcessHeadline(Headline headline) { lock(this) { String sentiment=ProcessHeadlineEntry(headline.Entry); return new HeadlineSentiment(headline,sentiment); } } public String ProcessHeadlineEntry(String headline) { lock(this) { StringBuilder sb=new StringBuilder(); Dictionary uniqueWords=new Dictionary(); // String[] words=headline.Split(' '); String[] words=headline.Split(new char[]{' ','-'}); double positionRank=1; double positiveRank=0.00; double negativeRank=0.00; for(int index=words.Length-1;index>=0;index--) { String word=words[index]; String cleanWord=word.Trim().ToLower(); cleanWord=CleanASCII(cleanWord); if(!Keep(cleanWord))continue; cleanWord=Clean(cleanWord); cleanWord=CleanStartsWithEndsWith(cleanWord); if(!KeepStartsWithEndsWith(cleanWord))continue; if(IsNumber(word))continue; cleanWord=cleanWord.Trim(); if(0==cleanWord.Length)continue; bool isNegative=IsNegativeWord(cleanWord); bool isPositive=IsPositiveWord(cleanWord); bool isVerb=IsPartOfSpeech(cleanWord,LexicalElement.VERB); bool isNoun=IsPartOfSpeech(cleanWord,LexicalElement.NOUN); bool isAdjective=IsPartOfSpeech(cleanWord,LexicalElement.ADJECTIVE); // Console.WriteLine(String.Format("Word '{0}':{1}:{2}:Pr{3}",cleanWord,LexicalElement.VERB,isPositive?"Positive":isNegative?"Negative":"Neutral",positionRank)); if(isNegative) { negativeRank+=positionRank; if(isAdjective&&index==0) // it's describing the next word so don't adjust anything { negativeRank-=positionRank; } else if(isVerb&&index>0) // if the word is a verb check to see if previous word is an adjective thus strengthening it { String preceedingWord=words[index-1].Trim().ToLower(); if(IsPartOfSpeech(preceedingWord,LexicalElement.ADJECTIVE)) { // Console.WriteLine(String.Format("Preceeding Word {0}:{1}",preceedingWord,LexicalElement.ADJECTIVE)); negativeRank+=(--positionRank); index--; continue; } else if(isNoun) // if the word is also a noun and we have positive verb following the we are doing negative things to something positive { List nextNouns = FindNextNouns(words, index + 1); for (int nounIndex = 0; nounIndex < nextNouns.Count; nounIndex++) { isNegative = IsNegativeWord(nextNouns[nounIndex]); isPositive = IsPositiveWord(nextNouns[nounIndex]); if (isPositive) negativeRank++; } } } else if(isNoun&&index>0) { List nextVerbs=FindNextVerbs(words,index+1); // if we have a negative noun then look for a following positive verb that would strengthen the negativity for(int verbIndex=0;verbIndex0) // if the word is a verb check to see if previous word is an adjective thus strengthening it { String preceedingWord=words[index-1].Trim().ToLower(); if(IsPartOfSpeech(preceedingWord,LexicalElement.ADJECTIVE)) { // Console.WriteLine(String.Format("Preceeding Word {0}:{1}",preceedingWord,LexicalElement.ADJECTIVE)); positiveRank+=(--positionRank); index--; continue; } } } if(uniqueWords.ContainsKey(cleanWord))continue; uniqueWords.Add(cleanWord,cleanWord); positionRank++; } // for each word in headline String headlineSentiment=LexicalElement.NEUTRAL_SENTIMENT; if(positiveRank>negativeRank)headlineSentiment=LexicalElement.POSITIVE_SENTIMENT; else if(negativeRank>positiveRank)headlineSentiment=LexicalElement.NEGATIVE_SENTIMENT; return headlineSentiment; } } private List FindNextVerbs(String[] words, int startingIndex) { List verbs=new List(); for (int index = startingIndex; index < words.Length; index++) { if(IsPartOfSpeech(words[index],LexicalElement.VERB))verbs.Add(words[index]); } return verbs; } private List FindNextNouns(String[] words, int startingIndex) { List verbs = new List(); for (int index = startingIndex; index < words.Length; index++) { if (IsPartOfSpeech(words[index], LexicalElement.NOUN)) verbs.Add(words[index]); } return verbs; } public void PrintHeadlineEntry(String headline) { lock (this) { StringBuilder sb = new StringBuilder(); Dictionary uniqueWords = new Dictionary(); String[] words = headline.Split(new char[] { ' ', '-' }); for (int index = words.Length - 1; index >= 0; index--) { String word = words[index]; String cleanWord = word.Trim().ToLower(); cleanWord = CleanASCII(cleanWord); if (!Keep(cleanWord)) continue; cleanWord = Clean(cleanWord); cleanWord = CleanStartsWithEndsWith(cleanWord); if (!KeepStartsWithEndsWith(cleanWord)) continue; if (IsNumber(word)) continue; cleanWord = cleanWord.Trim(); if (0 == cleanWord.Length) continue; bool isNegative = IsNegativeWord(cleanWord); bool isPositive = IsPositiveWord(cleanWord); bool isVerb = IsPartOfSpeech(cleanWord, LexicalElement.VERB); bool isNoun = IsPartOfSpeech(cleanWord, LexicalElement.NOUN); String partOfSpeech = null; if (isNegative || isPositive) partOfSpeech = GetPartOfSpeech(cleanWord); if (null == partOfSpeech) partOfSpeech = GetDictionaryPartOfSpeech(word); if (null == partOfSpeech) partOfSpeech = "?"; Console.WriteLine(String.Format("Word '{0}':{1}:{2}", cleanWord, partOfSpeech, isPositive ? "Positive" : isNegative ? "Negative" : "Neutral")); if (uniqueWords.ContainsKey(cleanWord)) continue; uniqueWords.Add(cleanWord, cleanWord); } // for each word in headline } } public HeadlinesSentiment ProcessHeadlines(Headlines headlines) { lock(this) { HeadlinesSentiment headlineSentimentList=new HeadlinesSentiment(); foreach(Headline headline in headlines) { HeadlineSentiment headlineSentiment=ProcessHeadline(headline); headlineSentimentList.Add(headlineSentiment); } // for each headline return headlineSentimentList; } } private bool IsNegativeWord(String word) { if(negativeLexicon.ContainsKey(word))return true; return false; } private bool IsPositiveWord(String word) { if(positiveLexicon.ContainsKey(word))return true; return false; } private bool IsPartOfSpeech(String word,String partOfSpeech) { if(null==word||null==partOfSpeech)return false; if (positiveLexicon.ContainsKey(word) && partOfSpeech.Equals(positiveLexicon[word].PartOfSpeech, StringComparison.OrdinalIgnoreCase)) return true; if (negativeLexicon.ContainsKey(word) && partOfSpeech.Equals(negativeLexicon[word].PartOfSpeech, StringComparison.OrdinalIgnoreCase)) return true; if(wordDictionary.ContainsKeyAs(word,partOfSpeech))return true; return false; } private String GetPartOfSpeech(String word) { if(positiveLexicon.ContainsKey(word))return positiveLexicon[word].PartOfSpeech; else if(negativeLexicon.ContainsKey(word))return negativeLexicon[word].PartOfSpeech; return null; } private String GetDictionaryPartOfSpeech(String word) { if(!wordDictionary.ContainsKey(word))return null; if(wordDictionary.ContainsKeyAs(word,LexicalElement.ADJECTIVE))return LexicalElement.ADJECTIVE; if(wordDictionary.ContainsKeyAs(word,LexicalElement.ADVERB))return LexicalElement.ADVERB; if(wordDictionary.ContainsKeyAs(word,LexicalElement.NOUN))return LexicalElement.NOUN; if(wordDictionary.ContainsKeyAs(word,LexicalElement.VERB))return LexicalElement.VERB; if(wordDictionary.ContainsKeyAs(word,LexicalElement.CONJUNCTION))return LexicalElement.CONJUNCTION; return null; } private bool Keep(String word) { // String[] removeList={"/","%",",",":","-",";","$",">","+","?","_","#","&"}; // String[] removeList={"/","%","-",";","$",">","+","?","_","#","&"}; String[] removeList={"/","%",";","$",">","+","?","_","#","&"}; foreach(String match in removeList) { if(word.Contains(match))return false; } return true; } private string CleanASCII(string s) { StringBuilder sb = new StringBuilder(s.Length); foreach(char c in s) { if((int)c > 127)continue; if((int)c < 32)continue; sb.Append(c); } return sb.ToString(); } private String Clean(String word) { String[] replaceList={"\"","?","~","|",":",","}; // String[] replaceList={"(",")","\"","?","~","|",":",","}; foreach(String toReplace in replaceList) { word=word.Replace(toReplace,null); } return word; } private String CleanStartsWithEndsWith(String word) { String[] replaceList={"'","`","[","]","!"}; foreach(String toReplace in replaceList) { if(word.StartsWith(toReplace))word=word.Substring(1); if(word.EndsWith(toReplace))word=word.Substring(0,word.Length-2); } return word; } private bool KeepStartsWithEndsWith(String word) { String[] replaceList={".","~","1","2","3","4","5","6","7","8","9","0"}; foreach(String toReplace in replaceList) { if(word.StartsWith(toReplace))return false; if(word.EndsWith(toReplace))return false; } return true; } private bool IsNumber(String word) { double result=double.NaN; return double.TryParse(word,out result); } } }