Files
marketdata/MarketDataLib/CNNProcessing/CNNProcessor.cs

329 lines
21 KiB
C#

using System;
using System.IO;
using System.Collections.Generic;
using MarketData.Utils;
using System.Text;
using System.Globalization;
namespace MarketData.CNNProcessing
{
public class CNNProcessor
{
private static int dayCount=270; // This is the default days
private static int width=128; // This is the default width
private static int height=128; // THis is the defaukt height
private CNNProcessor()
{
}
/// <summary>
/// GenerateTraining - This is the new one. Please refer to the CNNImageProcessor project for information on how to call this method.
/// </summary>
/// <param name="avoid">This is the collection of avoid holdings</param>
/// <param name="good">This is the collection of good holdings</param>
/// <param name="dimension">The image dimensions. for example 224 for 224x224 or 128 for 128x128</param>
/// <param name="histDays">This is the number of histDays. For example I used 90 for convnext</param>
/// <param name="generateType">The type. For example I used BollingerBandWithVIX which is a bollinger band with ^VIX overay for convnext</param>
/// <param name="rootFolder"></param>
public static void GenerateTraining(List<Holding> avoid, List<Holding> good, int dimension, int histDays,TestCase.GenerateType generateType=TestCase.GenerateType.BollingerBandWithVIX,String rootFolder=@"C:\boneyard\DeepLearning\ModelInputData\")
{
TestCases testCases=new TestCases();
DataProcessor dataProcessor=new DataProcessor();
dataProcessor.Width=dimension;
dataProcessor.Height=dimension;
dataProcessor.PenWidthArray=new float[]{.25f,.50f,.75f,1.00f,1.50f,2.00f};
if(!rootFolder.EndsWith(@"\"))rootFolder+=@"\";
// [0] Data - The avoid data
foreach(Holding holding in avoid)
{
testCases.Add(new TestCase(holding.Symbol,holding.PurchaseDate,histDays,TestCase.CaseType.Training,generateType));
}
dataProcessor.SetOutputFolderPath(rootFolder+"0");
dataProcessor.ClearFolderPath();
dataProcessor.ProcessData(testCases);
testCases.Clear();
// [1] Data - The good data
foreach(Holding holding in good)
{
testCases.Add(new TestCase(holding.Symbol,holding.PurchaseDate,histDays,TestCase.CaseType.Training,generateType));
}
dataProcessor.SetOutputFolderPath(rootFolder+"1");
dataProcessor.ClearFolderPath();
dataProcessor.ProcessData(testCases);
}
/// <summary>
/// GenerateTraining - This is the old methof training the resnet model. Please see above
/// </summary>
/// <param name="rootFolder"></param>
public static void GenerateTraining(String rootFolder=@"C:\boneyard\DeepLearning\ModelInputData\")
{
TestCases testCases=new TestCases();
DataProcessor dataProcessor=new DataProcessor();
dataProcessor.Width=width;
dataProcessor.Height=height;
// dataProcessor.PenWidthArray=new float[]{.50f,.75f,1.00f,1.12f,1.25f,1.31f,1.37f,1.50f,1.56f,1.62f,1.75f,1.87f,2.00f};
// Testing with 20,000 images in each set so reducing this use of pens to just one. It was producing 260,000 images for each classification,
// takings many hours to build the datasets
dataProcessor.PenWidthArray=new float[]{.75f,1.00f,1.12f};
if(!rootFolder.EndsWith(@"\"))rootFolder+=@"\";
// [0] Data - The avoid data
testCases.Add(new TestCase("CENX",DateTime.Parse("03/31/2022"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("ICPT",DateTime.Parse("12/31/2019"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("XMTR",DateTime.Parse("01/31/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CDXS",DateTime.Parse("11/30/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("EXTR",DateTime.Parse("08/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("DISCA",DateTime.Parse("02/26/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("DOCN",DateTime.Parse("10/29/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CCL",DateTime.Parse("07/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("NET",DateTime.Parse("11/30/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("F",DateTime.Parse("01/31/2022"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("RCKT",DateTime.Parse("01/31/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("ADT",DateTime.Parse("11/29/2019"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("MOS",DateTime.Parse("03/31/2022"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("FSM",DateTime.Parse("05/31/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("ITOS",DateTime.Parse("01/31/2022"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("ENPH",DateTime.Parse("01/29/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("GOSS",DateTime.Parse("10/29/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("KURA",DateTime.Parse("11/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("WIRE",DateTime.Parse("12/31/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CUBI",DateTime.Parse("12/31/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("SWAV",DateTime.Parse("05/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("SPSC",DateTime.Parse("10/29/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("EPC",DateTime.Parse("01/31/2022"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("FTCH",DateTime.Parse("12/31/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("VC",DateTime.Parse("10/31/2019"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("VRT",DateTime.Parse("04/30/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("LPG",DateTime.Parse("11/30/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("LI",DateTime.Parse("08/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("INBX",DateTime.Parse("01/31/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("WYNN",DateTime.Parse("02/28/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
// ****
dataProcessor.SetOutputFolderPath(rootFolder+"0");
dataProcessor.ProcessData(testCases);
testCases.Clear();
// [1] Data - The good data
testCases.Add(new TestCase("CPG",DateTime.Parse("03/31/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("VCEL",DateTime.Parse("02/26/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CLF",DateTime.Parse("01/29/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("XPO",DateTime.Parse("08/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("RH",DateTime.Parse("10/31/2019"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("NEM",DateTime.Parse("05/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("EXPI",DateTime.Parse("08/31/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("INTR",DateTime.Parse("07/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("TSLA",DateTime.Parse("08/31/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("WOW",DateTime.Parse("02/26/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CLDR",DateTime.Parse("10/31/2019"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("SPOT",DateTime.Parse("03/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("ACLS",DateTime.Parse("02/28/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("COCO",DateTime.Parse("04/28/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("DAC",DateTime.Parse("04/30/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CDE",DateTime.Parse("06/28/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("UEC",DateTime.Parse("09/29/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("ERJ",DateTime.Parse("05/28/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("SITM",DateTime.Parse("08/31/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CAVA",DateTime.Parse("06/28/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("APGE",DateTime.Parse("02/29/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("CPRI",DateTime.Parse("01/29/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("VST",DateTime.Parse("06/28/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("PDD",DateTime.Parse("05/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("NVCR",DateTime.Parse("10/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("VITL",DateTime.Parse("04/30/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("SIG",DateTime.Parse("03/31/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("YPF",DateTime.Parse("11/30/2022"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("BLBD",DateTime.Parse("03/28/2024"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("BAND",DateTime.Parse("06/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("COOP",DateTime.Parse("09/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("SPT",DateTime.Parse("07/30/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("NTLA",DateTime.Parse("12/31/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("PTON",DateTime.Parse("09/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("VRT",DateTime.Parse("07/31/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("NET",DateTime.Parse("08/31/2021"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("NVDA",DateTime.Parse("04/28/2023"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("DOCU",DateTime.Parse("05/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
testCases.Add(new TestCase("SIG",DateTime.Parse("10/30/2020"),270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));
// ***
dataProcessor.SetOutputFolderPath(rootFolder+"1");
dataProcessor.ProcessData(testCases);
}
public static void GenerateGeneralizedData()
{
TestCases testCases=new TestCases();
DataProcessor dataProcessor=new DataProcessor();
dataProcessor.Width=width;
dataProcessor.Height=height;
dataProcessor.PenWidth=1;
string[,] s=new string[,]{{"CRTX","07/30/2021"},{"SBSW","12/31/2019"},{"CWH","01/31/2020"},{"DISCA","02/26/2021"},{"DNLI","12/31/2020"},{"SNAP","12/29/2023"},{"FSM","05/31/2024"},{"TSLA","11/30/2021"},{"SWAV","05/31/2023"},{"PCTY","09/30/2021"},{"DCBO","08/31/2021"},{"LPG","11/30/2023"},{"KOD","11/30/2020"},{"NUE","05/28/2021"},{"XPO","08/31/2023"},{"EDU","11/30/2023"},{"BAK","04/30/2021"},{"INSW","11/30/2022"},{"ACLS","02/28/2023"},{"VST","03/28/2024"},{"TRMD","11/30/2022"},{"APGE","02/29/2024"},{"ANF","02/29/2024"},{"BAND","06/30/2020"},{"COIN","12/29/2023"},{"MTDR","03/31/2021"}};
for(int index=0;index<s.Length/2;index++)
{
String symbol=s[index,0];
DateTime purchaseDate=DateTime.Parse(s[index,1]);
TestCase testCase=new TestCase(symbol,purchaseDate,dayCount,TestCase.CaseType.Test,TestCase.GenerateType.BollingerBand);
testCases.Add(testCase);
}
dataProcessor.SetOutputFolderPath(@"C:\boneyard\DeepLearning\IndividualValidationCases");
dataProcessor.ProcessData(testCases);
}
public static String Predict()
{
return "";
}
public static void TestPredictAPI()
{
String strPathOutputFile = @"C:\boneyard\DeepLearning\TestPredictAPIOutput\outputcases_europa.csv";
CNNClient cnnClient = new CNNClient("http://10.0.0.73:5000");
if (!cnnClient.Ping())
{
Console.WriteLine("Model server is not reachable.");
return;
}
DataProcessor dataProcessor=new DataProcessor();
dataProcessor.Width=width;
dataProcessor.Height=height;
dataProcessor.PenWidth=1;
if(File.Exists(strPathOutputFile))
{
try{File.Delete(strPathOutputFile);}
catch(Exception exception)
{
Console.WriteLine(String.Format("Exception:{0}",exception.ToString()));
throw;
}
}
StreamWriter streamWriter=new StreamWriter(new FileStream(strPathOutputFile,FileMode.CreateNew));
string[,] s=new string[,]{{"CRTX","07/30/2021"},{"SBSW","12/31/2019"},{"CWH","01/31/2020"},{"DISCA","02/26/2021"},{"DNLI","12/31/2020"},{"SNAP","12/29/2023"},{"FSM","05/31/2024"},{"TSLA","11/30/2021"},{"SWAV","05/31/2023"},{"PCTY","09/30/2021"},{"DCBO","08/31/2021"},{"LPG","11/30/2023"},{"KOD","11/30/2020"},{"NUE","05/28/2021"},{"XPO","08/31/2023"},{"EDU","11/30/2023"},{"BAK","04/30/2021"},{"INSW","11/30/2022"},{"ACLS","02/28/2023"},{"VST","03/28/2024"},{"TRMD","11/30/2022"},{"APGE","02/29/2024"},{"ANF","02/29/2024"},{"BAND","06/30/2020"},{"COIN","12/29/2023"},{"MTDR","03/31/2021"}};
streamWriter.WriteLine("Symbol,Date,Response,Response,Raw Response");
MDTrace.WriteLine(LogLevel.DEBUG,"Symbol,Date,Response,Response,Raw Response");
for(int index=0;index<s.Length/2;index++)
{
StringBuilder sb=new StringBuilder();
String symbol=s[index,0];
DateTime purchaseDate=DateTime.Parse(s[index,1]);
TestCase testCase=new TestCase(symbol,purchaseDate,dayCount,TestCase.CaseType.Test,TestCase.GenerateType.BollingerBand,TestCase.OutputType.OutputStream);
dataProcessor.ProcessData(testCase);
// testCase.Responses.Add(cnnClient.Predict(CNNClient.Model.resnet50,testCase.LastStream));
testCase.Responses.Add(cnnClient.Predict(CNNClient.Model.resnet50_20241024_270,testCase.LastStream));
String strPredictionResponse=testCase.LastResponse;
String strPredictionResult=strPredictionResponse.Substring(strPredictionResponse.IndexOf("-->")+3);
int result=int.Parse(Utility.BetweenString(strPredictionResult,"[[","]"));
String strPredictionValue=strPredictionResponse.Substring(0,strPredictionResponse.IndexOf("-->"));
double value=double.Parse(Utility.BetweenString(strPredictionValue,"[[","]"));
sb.Append(symbol);
sb.Append(",");
sb.Append(Utility.AddQuotes(Utility.DateTimeToStringMMSDDSYYYY(purchaseDate)));
sb.Append(",");
sb.Append(Utility.AddQuotes(Utility.FormatNumber(value,8,false))).Append(",").Append(Utility.AddQuotes(result.ToString()));
sb.Append(",").Append(Utility.AddQuotes(strPredictionResponse));
MDTrace.WriteLine(LogLevel.DEBUG,sb.ToString());
streamWriter.WriteLine(sb.ToString());
}
streamWriter.Flush();
streamWriter.Close();
streamWriter.Dispose();
}
public static void divide(double divisions,double seedMin,double seedMax)
{
double skip=(seedMax-seedMin)/divisions;
List<double> array=new List<double>();
for(double seed=seedMax;seed>=seedMin;seed-=skip)
{
if(array.Count==divisions)break;
array.Add(seed);
}
for(int index=array.Count-1;index>=0;index--)
{
Console.Write(String.Format("{0}f,",Utility.FormatNumber(array[index],6)));
}
Console.WriteLine("");
}
}
public class Holding
{
public String Symbol {get;set;}
public DateTime PurchaseDate {get; set; }
public double PurchasePrice {get;set;}
public DateTime SellDate {get; set; }
public double SellPrice {get;set;}
public double GainLoss{ get; set;}
public double GainLossPercent {get;set;}
private static readonly string[] DateFormats = { "MM/dd/yyyy", "M/dd/yyyy", "M/d/yyyy" };
private static readonly CultureInfo UsCulture = CultureInfo.GetCultureInfo("en-US");
public static String Heading
{
get
{
return "Symbol,Shares,Purchase Date,Purchase Price,Sell Date,Sell Price,Exposure,Beta,BetaMonths,SharpeRatio,RiskAdjustedWeight,RiskAdjustedAllocation,TargetBetaOverBeta,Score,CNN Prediction,Market Value,Gain Loss,Gain Loss (%)";
}
}
public String ToTestCase()
{
StringBuilder sb = new StringBuilder();
sb.Append("testCases.Add(new TestCase(").Append("\"").Append(Symbol).Append("\"").Append(",");
sb.Append("DateTime.Parse(").Append("\"").Append(Utility.DateTimeToStringMMSDDSYYYY(PurchaseDate)).Append("\")").Append(",");
sb.Append("270,TestCase.CaseType.Training,TestCase.GenerateType.BollingerBand));");
return sb.ToString();
}
public override String ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append(Symbol).Append(",");
sb.Append(","); // shares
sb.Append(PurchaseDate.ToShortDateString()).Append(",");
sb.Append(Utility.FormatNumber(PurchasePrice,3)).Append(",");
sb.Append(SellDate.ToShortDateString()).Append(",");
sb.Append(Utility.FormatNumber(SellPrice,3)).Append(",");
sb.Append(","); //exposure
sb.Append(","); //beta
sb.Append(","); //bta months
sb.Append(","); //sharpe ratio
sb.Append(","); //risk adjusted weight
sb.Append(","); //RiskAdjustedAllocation
sb.Append(","); //TargetBetaOverBeta
sb.Append(","); //Score
sb.Append(","); //CNNPrediction
sb.Append(","); //Market Value
sb.Append(Utility.FormatNumber(GainLoss,3)).Append(",");
sb.Append(Utility.FormatNumber(GainLossPercent,3));
return sb.ToString();
}
public static Holding FromString(string strLine)
{
string[] items = strLine.Split(',');
Holding holding = new Holding();
holding.Symbol = items[0];
if(string.IsNullOrEmpty(holding.Symbol))return null;
holding.PurchaseDate = DateTime.ParseExact(items[2], DateFormats, UsCulture, DateTimeStyles.AssumeLocal);
holding.PurchasePrice = double.Parse(items[3], UsCulture);
holding.SellDate = DateTime.ParseExact(items[4], DateFormats, UsCulture, DateTimeStyles.AssumeLocal);
holding.SellPrice = double.Parse(items[5], UsCulture);
holding.GainLoss = double.Parse(items[16], UsCulture);
holding.GainLossPercent = double.Parse(items[17].TrimEnd('%'), UsCulture) / 100.0;
return holding;
}
}
}