Daily and code cleanup for headlines and ExtractLongBusinessSummary
Some checks failed
Build .NET Project / build (push) Has been cancelled
Some checks failed
Build .NET Project / build (push) Has been cancelled
This commit is contained in:
@@ -10,6 +10,7 @@ namespace MarketData.DataAccess
|
||||
private HeadlinesDA()
|
||||
{
|
||||
}
|
||||
|
||||
public static DateTime GetMaxHeadlineDate()
|
||||
{
|
||||
MySqlConnection sqlConnection = null;
|
||||
@@ -45,6 +46,7 @@ namespace MarketData.DataAccess
|
||||
if (null != sqlConnection) sqlConnection.Close();
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> GetHeadlineDates()
|
||||
{
|
||||
MySqlConnection sqlConnection = null;
|
||||
@@ -80,6 +82,7 @@ namespace MarketData.DataAccess
|
||||
if (null != sqlConnection) sqlConnection.Close();
|
||||
}
|
||||
}
|
||||
|
||||
public static Headlines GetHeadlines(String symbol)
|
||||
{
|
||||
MySqlConnection sqlConnection = null;
|
||||
@@ -123,6 +126,7 @@ namespace MarketData.DataAccess
|
||||
if (null != sqlConnection) sqlConnection.Close();
|
||||
}
|
||||
}
|
||||
|
||||
public static Headlines GetHeadlines()
|
||||
{
|
||||
MySqlConnection sqlConnection = null;
|
||||
@@ -166,6 +170,7 @@ namespace MarketData.DataAccess
|
||||
if (null != sqlConnection) sqlConnection.Close();
|
||||
}
|
||||
}
|
||||
|
||||
// This was authored for mobile app. It wants the sorting to match the WPF app.
|
||||
public static Headlines GetLatestHeadlines()
|
||||
{
|
||||
@@ -308,7 +313,6 @@ namespace MarketData.DataAccess
|
||||
/// <returns></returns>
|
||||
public static bool InsertHeadlines(Headlines headlines)
|
||||
{
|
||||
const int batchSize = 1000;
|
||||
MySqlConnection sqlConnection=null;
|
||||
MySqlTransaction sqlTransaction=null;
|
||||
|
||||
@@ -318,9 +322,9 @@ namespace MarketData.DataAccess
|
||||
sqlConnection = SqlUtils.CreateMySqlConnection(MainDataSource.Instance.LocateDataSource("market_data"));
|
||||
sqlTransaction = sqlConnection.BeginTransaction(System.Data.IsolationLevel.ReadCommitted);
|
||||
headlines=new Headlines(headlines.Distinct(new HeadlinesEqualityComparer()).ToList());
|
||||
foreach (IEnumerable<Headline> batch in headlines.Chunk(batchSize))
|
||||
foreach (Headline headline in headlines)
|
||||
{
|
||||
SafeInsertBatch(batch.ToList(), sqlConnection, sqlTransaction);
|
||||
if(!HeadlineExists(headline,sqlConnection,sqlTransaction))InsertHeadline(headline,sqlConnection,sqlTransaction);
|
||||
}
|
||||
sqlTransaction.Commit();
|
||||
return true;
|
||||
@@ -338,76 +342,6 @@ namespace MarketData.DataAccess
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// SafeInsertBatch - This recursive approach allows us to catch exceptions and recursively call SafeInsertBatch continually dividing the
|
||||
/// batch until we find the offensive row.
|
||||
/// </summary>
|
||||
/// <param name="batch"></param>
|
||||
/// <param name="sqlConnection"></param>
|
||||
/// <param name="sqlTransaction"></param>
|
||||
private static void SafeInsertBatch(List<Headline> batch, MySqlConnection sqlConnection, MySqlTransaction sqlTransaction)
|
||||
{
|
||||
try
|
||||
{
|
||||
InsertBatch(batch, sqlConnection, sqlTransaction);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// If only one row, it's the offender
|
||||
if (batch.Count == 1)
|
||||
{
|
||||
MDTrace.WriteLine(LogLevel.DEBUG, ex);
|
||||
MDTrace.WriteLine(LogLevel.DEBUG, $"Bad row: Symbol={batch[0].Symbol}, Headline={batch[0].Entry}");
|
||||
return;
|
||||
}
|
||||
// Split batch and retry
|
||||
int mid = batch.Count / 2;
|
||||
List<Headline> firstHalf = batch.Take(mid).ToList();
|
||||
List<Headline> secondHalf = batch.Skip(mid).ToList();
|
||||
SafeInsertBatch(firstHalf, sqlConnection, sqlTransaction);
|
||||
SafeInsertBatch(secondHalf, sqlConnection, sqlTransaction);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Inserts a batch of headlines
|
||||
/// </summary>
|
||||
/// <param name="batch"></param>
|
||||
/// <param name="sqlConnection"></param>
|
||||
/// <param name="sqlTransaction"></param>
|
||||
private static void InsertBatch(IEnumerable<Headline> batch, MySqlConnection sqlConnection, MySqlTransaction sqlTransaction)
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
using MySqlCommand sqlCommand = new MySqlCommand();
|
||||
sqlCommand.Connection = sqlConnection;
|
||||
sqlCommand.Transaction = sqlTransaction;
|
||||
DateTime now = DateTime.Now;
|
||||
|
||||
sb.Append("INSERT IGNORE INTO Headlines (symbol, asof, headline, source, modified) VALUES ");
|
||||
int index = 0;
|
||||
foreach (Headline headline in batch)
|
||||
{
|
||||
if (index > 0) sb.Append(",");
|
||||
sb.Append($"(@symbol{index}, @asof{index}, @headline{index}, @source{index}, @modified{index})");
|
||||
sqlCommand.Parameters.AddWithValue($"@symbol{index}", headline.Symbol);
|
||||
sqlCommand.Parameters.AddWithValue($"@asof{index}", headline.Date);
|
||||
sqlCommand.Parameters.AddWithValue($"@headline{index}", headline.Entry);
|
||||
sqlCommand.Parameters.AddWithValue($"@source{index}", headline.Source);
|
||||
DateTime modified = Utility.IsEpoch(headline.Modified) ? now : headline.Modified;
|
||||
sqlCommand.Parameters.AddWithValue($"@modified{index}", modified);
|
||||
index++;
|
||||
}
|
||||
sqlCommand.CommandText = sb.ToString();
|
||||
sqlCommand.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// InsertHeadline - This is now parameterized. The MySql driver should handle all escaping etc.,
|
||||
/// </summary>
|
||||
/// <param name="headline"></param>
|
||||
/// <param name="sqlConnection"></param>
|
||||
/// <param name="sqlTransaction"></param>
|
||||
/// <returns></returns>
|
||||
private static bool InsertHeadline(Headline headline,MySqlConnection sqlConnection,MySqlTransaction sqlTransaction)
|
||||
{
|
||||
MySqlCommand sqlCommand=null;
|
||||
@@ -416,14 +350,17 @@ namespace MarketData.DataAccess
|
||||
try
|
||||
{
|
||||
if (null == headline || null == headline.Symbol || null==headline.Entry) return false;
|
||||
strQuery = @"INSERT INTO Headlines (symbol, asof, headline, source, modified) VALUES (@symbol, @asof, @headline, @source, @modified)";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.Append("insert into Headlines(symbol,asof,headline,source,modified) values(");
|
||||
sb.Append(SqlUtils.AddQuotes(headline.Symbol)).Append(",");
|
||||
sb.Append(SqlUtils.AddQuotes(SqlUtils.SqlDate(headline.Date))).Append(",");
|
||||
sb.Append(SqlUtils.AddQuotes(SqlUtils.SqlString(headline.Entry))).Append(",");
|
||||
sb.Append(SqlUtils.AddQuotes(SqlUtils.SqlString(headline.Source))).Append(",");
|
||||
if(Utility.IsEpoch(headline.Modified))sb.Append(SqlUtils.AddQuotes(SqlUtils.ToSqlDateTime(DateTime.Now)));
|
||||
else sb.Append(SqlUtils.AddQuotes(SqlUtils.ToSqlDateTime(headline.Modified)));
|
||||
sb.Append(")");
|
||||
strQuery = sb.ToString();
|
||||
sqlCommand = new MySqlCommand(strQuery, sqlConnection, sqlTransaction);
|
||||
sqlCommand.Parameters.AddWithValue("@symbol", headline.Symbol);
|
||||
sqlCommand.Parameters.AddWithValue("@asof", headline.Date);
|
||||
sqlCommand.Parameters.AddWithValue("@headline", headline.Entry);
|
||||
sqlCommand.Parameters.AddWithValue("@source", headline.Source);
|
||||
DateTime modified = Utility.IsEpoch(headline.Modified) ? DateTime.Now : headline.Modified;
|
||||
sqlCommand.Parameters.AddWithValue("@modified", modified);
|
||||
sqlCommand.CommandTimeout = SqlUtils.COMMAND_TIMEOUT;
|
||||
sqlCommand.ExecuteNonQuery();
|
||||
return true;
|
||||
@@ -431,7 +368,7 @@ namespace MarketData.DataAccess
|
||||
catch (Exception exception)
|
||||
{
|
||||
MDTrace.WriteLine(LogLevel.DEBUG,exception);
|
||||
SqlUtils.LogCommandParameters(strQuery, sqlCommand);
|
||||
MDTrace.WriteLine(LogLevel.DEBUG,String.Format("Query was '{0}'",strQuery));
|
||||
return false;
|
||||
}
|
||||
finally
|
||||
@@ -440,31 +377,36 @@ namespace MarketData.DataAccess
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HeadlineExists - The now uses parameterized arguments now. The driver will handle escaping etc.,
|
||||
/// </summary>
|
||||
/// <param name="headline"></param>
|
||||
/// <param name="sqlConnection"></param>
|
||||
/// <param name="sqlTransaction"></param>
|
||||
/// <returns></returns>
|
||||
private static bool HeadlineExists(Headline headline,MySqlConnection sqlConnection,MySqlTransaction sqlTransaction)
|
||||
{
|
||||
MySqlDataReader sqlDataReader=null;
|
||||
MySqlCommand sqlCommand=null;
|
||||
String strQuery = null;
|
||||
|
||||
try
|
||||
{
|
||||
if (null == headline || null == headline.Symbol) return false;
|
||||
string strQuery = @"SELECT count(*) FROM headlines WHERE asof = @asof AND headline = @headline";
|
||||
using MySqlCommand sqlCommand = new MySqlCommand(strQuery, sqlConnection, sqlTransaction);
|
||||
sqlCommand.Parameters.AddWithValue("@asof", headline.Date);
|
||||
sqlCommand.Parameters.AddWithValue("@headline", headline.Entry); // This will handle proper escaping of characters etc.,
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.Append("select count(*) from headlines where ");
|
||||
sb.Append(" asof=").Append(SqlUtils.AddQuotes(SqlUtils.SqlDate(headline.Date))).Append(" and ");
|
||||
sb.Append(" headline=").Append(SqlUtils.AddQuotes(SqlUtils.SqlString(headline.Entry)));
|
||||
strQuery = sb.ToString();
|
||||
sqlCommand = new MySqlCommand(strQuery, sqlConnection, sqlTransaction);
|
||||
sqlCommand.CommandTimeout = SqlUtils.COMMAND_TIMEOUT;
|
||||
int result = Convert.ToInt32(sqlCommand.ExecuteScalar());
|
||||
return 0!=result;
|
||||
sqlDataReader=sqlCommand.ExecuteReader();
|
||||
if(!sqlDataReader.Read())return false;
|
||||
return 0==sqlDataReader.GetInt32(0)?false:true;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
MDTrace.WriteLine(LogLevel.DEBUG,exception);
|
||||
return false;
|
||||
}
|
||||
finally
|
||||
{
|
||||
if(null!=sqlCommand)sqlCommand.Dispose();
|
||||
if(null!=sqlDataReader){sqlDataReader.Close();sqlDataReader.Dispose();}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1601,39 +1601,46 @@ namespace MarketData.Helper
|
||||
/// <returns></returns>
|
||||
public static string ExtractLongBusinessSummary(string html)
|
||||
{
|
||||
if(string.IsNullOrEmpty(html))return null;
|
||||
// locate the script containing the assetProfile API response
|
||||
int start = html.IndexOf("modules=assetProfile");
|
||||
if (start < 0) return null;
|
||||
try
|
||||
{
|
||||
if(string.IsNullOrEmpty(html))return null;
|
||||
// locate the script containing the assetProfile API response
|
||||
int start = html.IndexOf("modules=assetProfile");
|
||||
if (start < 0) return null;
|
||||
|
||||
// move back to start of script tag
|
||||
start = html.LastIndexOf("<script", start);
|
||||
if (start < 0) return null;
|
||||
// move back to start of script tag
|
||||
start = html.LastIndexOf("<script", start);
|
||||
if (start < 0) return null;
|
||||
|
||||
int jsonStart = html.IndexOf(">", start);
|
||||
if (jsonStart < 0) return null;
|
||||
jsonStart++;
|
||||
int jsonStart = html.IndexOf(">", start);
|
||||
if (jsonStart < 0) return null;
|
||||
jsonStart++;
|
||||
|
||||
int jsonEnd = html.IndexOf("</script>", jsonStart);
|
||||
if (jsonEnd < 0) return null;
|
||||
int jsonEnd = html.IndexOf("</script>", jsonStart);
|
||||
if (jsonEnd < 0) return null;
|
||||
|
||||
string outerJson = html.Substring(jsonStart, jsonEnd - jsonStart);
|
||||
string outerJson = html.Substring(jsonStart, jsonEnd - jsonStart);
|
||||
|
||||
// parse outer JSON
|
||||
JObject outer = Newtonsoft.Json.Linq.JObject.Parse(outerJson);
|
||||
// parse outer JSON
|
||||
JObject outer = Newtonsoft.Json.Linq.JObject.Parse(outerJson);
|
||||
|
||||
// body is escaped JSON
|
||||
string bodyJson = outer["body"]?.ToString();
|
||||
if (bodyJson == null) return null;
|
||||
// body is escaped JSON
|
||||
string bodyJson = outer["body"]?.ToString();
|
||||
if (bodyJson == null) return null;
|
||||
|
||||
// parse inner JSON
|
||||
JObject inner = Newtonsoft.Json.Linq.JObject.Parse(bodyJson);
|
||||
JToken resultToken = inner["quoteSummary"]?["result"]; // Ensure result is an array with at least one element
|
||||
if (resultToken is not JArray resultArray || resultArray.Count == 0)
|
||||
// parse inner JSON
|
||||
JObject inner = Newtonsoft.Json.Linq.JObject.Parse(bodyJson);
|
||||
JToken resultToken = inner["quoteSummary"]?["result"]; // Ensure result is an array with at least one element
|
||||
if (resultToken is not JArray resultArray || resultArray.Count == 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return resultArray[0]?["assetProfile"]?["longBusinessSummary"]?.ToString();
|
||||
}
|
||||
catch(Exception)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return resultArray[0]?["assetProfile"]?["longBusinessSummary"]?.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
Reference in New Issue
Block a user