From e92a8e230551fb9a2b3c70c6f614ecaddb22d499 Mon Sep 17 00:00:00 2001 From: Sean Date: Wed, 1 Oct 2025 23:13:03 -0400 Subject: [PATCH] Commit Latest --- .../MarketDataLib/Helper/MarketDataHelper.cs | 82 ++++++++++++++- .../Integration/HttpNetRequest.cs | 99 +++++++++++-------- 2 files changed, 138 insertions(+), 43 deletions(-) diff --git a/MarketData/MarketDataLib/Helper/MarketDataHelper.cs b/MarketData/MarketDataLib/Helper/MarketDataHelper.cs index cac0b56..96924cd 100755 --- a/MarketData/MarketDataLib/Helper/MarketDataHelper.cs +++ b/MarketData/MarketDataLib/Helper/MarketDataHelper.cs @@ -5343,11 +5343,11 @@ namespace MarketData.Helper // sb.Append(asOf.Day.ToString()); // sb.Append("%2F"); // sb.Append((asOf.Year - 2000).ToString()); - // sb.Append("&x=38&y=25");0000000000000000000000000000000000000000 + // sb.Append("&x=38&y=25"); // strRequest = sb.ToString(); - + String requestUri = "https://www.marketwatch.com/investing/stock"; //https://www.marketwatch.com/investing/stock/TSCDY/download-data sb.Append("https://www.marketwatch.com/investing/stock/").Append(symbol); @@ -5360,7 +5360,11 @@ namespace MarketData.Helper strRequest = sb.ToString(); - httpNetResponse = HttpNetRequest.GetRequestNoEncodingV2(strRequest); + strRequest = "https://www.marketwatch.com/investing/stock/TSCDY/download-data"; + + CookieCollection cookieCollection = GetCookieCollection("www.marketwatch.com"); + + httpNetResponse = HttpNetRequest.GetRequestNoEncodingV2(strRequest,cookieCollection,new Uri("https://www.marketwatch.com/investing/stock")); if (!httpNetResponse.Success) { MDTrace.WriteLine(LogLevel.DEBUG, String.Format("Request:{0} failed with status {1}", httpNetResponse.Request, httpNetResponse.StatusCode)); @@ -5400,6 +5404,78 @@ namespace MarketData.Helper if (null != httpNetResponse) httpNetResponse.Dispose(); } } + + private static CookieCollection GetCookieCollection(String cookieDomain) + { + String[] cookies = { + "refresh=off;", + "letsGetMikey=enabled;", + "datadome=fzsk~7E54ba21Kkt75NL7RdtwUZrtrG64Uvv9RuT_QixsRLt1Pg90R~N1RaukSzqDP0YqHWcvX166YjBBnpdSaX7vh~Wafia2pGVi77ERaKSz0~ZyRLxznkzn38oztAD;", + "mw_loc=%7B%22Region%22%3A%22FL%22%2C%22Country%22%3A%22US%22%2C%22Continent%22%3A%22NA%22%2C%22ApplicablePrivacy%22%3A0%7D;", + "gdprApplies=false;", + "ab_uuid=e7c9a8ca-7cd9-48f8-818f-7ccaa17ffc4b;", + "fullcss-quote=quote-1e61c76db6.min.css;", + "icons-loaded=true;", + "recentqsmkii=AmericanDepositoryReceiptStock-US-TSCDY|Stock-US-AAPL|Index-US-DJIA;", + "connect.sid=s%3A3XNkudEKQqYfNnQGrDguqwsGtYWTbtWy.3MJrxWzt5ZdjlLen9TChXjvHqpeUW%2FcORbyHcuXO9LQ;", + "_pubcid=92731ab0-a67f-4ca8-8a43-69a724febae8;", + "_pubcid_cst=DCwOLBEsaQ%3D%3D;", + "_lr_env_src_ats=false;", + "_sp_su=true;", + "usnatUUID=99ec3682-498e-4f9e-b269-c046af6d9640;", + "_ncg_domain_id_=ddcf2e8a-9f26-4e50-a630-6f20878eab2a.1.1759284504.1790820504;", + "utag_main=v_id:01999d87268e004df9958911c73005050003f00d00b10$_sn:2$_ss:0$_st:1759349260753$vapi_domain:marketwatch.com$ses_id:1759346895885%3Bexp-session$_pn:10%3Bexp-session$_prevpage:MW_Company%20Download%20Data%3Bexp-1759351060755;", + "AMCV_CB68E4BA55144CAA0A4C98A5%40AdobeOrg=1585540135%7CMCIDTS%7C20363%7CMCMID%7C73618612147539605398398216823576272907%7CMCAID%7CNONE%7CMCOPTOUT-1759354661s%7CNONE%7CvVersion%7C4.4.0;", + "consentUUID=db30bab1-8191-4fdd-9a50-4d1ad172586a;", + "ajs_anonymous_id=9222ad8b-156b-4f06-a002-81c83466cad5;", + "_fbp=fb.1.1759284504405.1136234062;", + "_meta_facebookTag_sync=1759284504405;", + "_meta_googleAdsSegments_library_loaded=1759284504406;", + "AMCVS_CB68E4BA55144CAA0A4C98A5%40AdobeOrg=1;", + "s_ppv=MW_Company%2520Download%2520Data%2C7%2C7%2C150;", + "s_tp=2100;", + "s_cc=true;", + "_dj_id.cff7=.1759284504.2.1759347461.1759284504.bb5714a9-80f8-4f28-9055-84dd571c8508.17332b11-d3d6-4cf3-b12a-0372b78f56c6.97da5143-591d-4be2-a4b7-cf7fd004273e.1759346896124.10;", + "_gcl_au=1.1.870637042.1759284505;", + "_meta_cross_domain_id=8166c525-59e1-4deb-8730-5b7346db4b2d;", + "_meta_cross_domain_recheck=1759284504614;", + "_ga_K2H7B9JRSS=GS2.1.s1759346896$o2$g1$t1759347460$j19$l0$h1325198532;", + "_ga=GA1.1.391315045.1759284505;", + "_dj_sp_id=02b318f5-3f5d-4b6b-9b50-e046bb67bfde;", + "_pctx=%7Bu%7DN4IgrgzgpgThIC4B2YA2qA05owMoBcBDfSREQpAeyRCwgEt8oBJAEzIE4AmHgZi4CsvAIwB2DqIAMADkHTRvEAF8gA;", + "_pcid=%7B%22browserId%22%3A%22mg7ckjvkk2y3p3w6%22%7D;", + "__pat=-14400000;", + "__pvi=eyJpZCI6InYtbWc4ZHB0bjB2a21oOTlraCIsImRvbWFpbiI6Ii5tYXJrZXR3YXRjaC5jb20iLCJ0aW1lIjoxNzU5MzQ3NDYxMjgyfQ%3D%3D;", + "xbc=%7Bkpcd%7DChBtZzdja2p2a2syeTNwM3c2Ego2eWtRMjdJRXB1GjxsRGZ0c1NJZXk5SXB5eEE5U0VyTVl2b2oxVmhIS0tuT3ZMdzV5anJtMWgxZWNwNUhNR3Y3VHdJbTdCaDggAA;", + "_pcus=eyJ1c2VyU2VnbWVudHMiOnsiQ09NUE9TRVIxWCI6eyJzZWdtZW50cyI6WyJMVHM6ODhjNWM5YTg1YjJmNWU4MGViYTZiOWViOWVjYjVmOTM3NDljNzM2OTpub19zY29yZSIsIkxUcmV0dXJuOmY3N2FkMmI5NDZmZTBlOWI1NDRlOWZlMzZiYTk1NGViNmI0ZGNlYWE6bm9fc2NvcmUiLCJDU2NvcmU6YzdiZDdjNTlhZTQ5ZmEwODI0NTc1MjAwOGMzODlhMmIyZDY0MGYyNTpub19zY29yZSJdfX19;", + "_awl=2.1759347461.5-51cc96805a6d7c5c688e4ef811495a68-6763652d75732d6561737431-1;", + "LANG=en_US;", + "LANG_CHANGED=en_US;", + "fullcss-error=section-ee8713325f.min.css;", + "refresh=off;", + "letsGetMikey=enabled;", + "usr_prof_v2=eyJpYyI6Mn0%3D;", + "fullcss-section=section-ee8713325f.min.css;", + "_lr_retry_request=true;", + "__tbc=%7Bkpcd%7DChBtZzdja2p2a2syeTNwM3c2Ego2eWtRMjdJRXB1GjxsRGZ0c1NJZXk5SXB5eEE5U0VyTVl2b2oxVmhIS0tuT3ZMdzV5anJtMWgxZWNwNUhNR3Y3VHdJbTdCaDggAA" + }; + CookieCollection cookieCollection = new CookieCollection(); + for (int index = 0; index < cookies.Count(); index++) + { + String strCookie = cookies[index]; + if (strCookie.EndsWith(";")) strCookie = strCookie.Substring(0, strCookie.Length - 1); + String[] pairs = strCookie.Split('='); + cookieCollection.Add(new Cookie() + { + Name = pairs[0], + Value = pairs[1], + Domain = cookieDomain + }); + + } + return cookieCollection; + } + //******************************************************************************************************************************************************************************************************** // ******************************************************************************** H I S T O R I C A L P R I C I N G Y A H O O ********************************************************************* //******************************************************************************************************************************************************************************************************** diff --git a/MarketData/MarketDataLib/Integration/HttpNetRequest.cs b/MarketData/MarketDataLib/Integration/HttpNetRequest.cs index de7185e..2577830 100755 --- a/MarketData/MarketDataLib/Integration/HttpNetRequest.cs +++ b/MarketData/MarketDataLib/Integration/HttpNetRequest.cs @@ -406,7 +406,7 @@ namespace MarketData.Integration } } - public static HttpNetResponse GetRequestNoEncodingV2(String strRequest,WebProxy webProxy=null) + public static HttpNetResponse GetRequestNoEncodingV2(String strRequest,CookieCollection cookieCollection,Uri uri,WebProxy webProxy=null) { HttpWebResponse webResponse=null; try @@ -424,17 +424,35 @@ namespace MarketData.Integration webRequest.Headers.Add("Accept-Encoding: None"); webRequest.Headers.Add("Accept-Language: en-US"); webRequest.UserAgent = "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"; - webRequest.KeepAlive = true; - try{webResponse = (HttpWebResponse)webRequest.GetResponse();} - catch(WebException webException) + webRequest.Headers.Add("sec-fetch-dest", "document"); + webRequest.Headers.Add("sec-fetch-mode", "navigate"); + webRequest.Headers.Add("sec-fetch-site","none"); + webRequest.Headers.Add("sec-fetch-user","?1"); + webRequest.Headers.Add("upgrade-insecure-requests","1"); + webRequest.Headers.Add("priority","u=0, i"); + webRequest.AllowAutoRedirect = true; + +// webRequest.KeepAlive = true; + if (null != cookieCollection) { - if(IsMovedException(webException)) + CookieContainer cookieContainer = new CookieContainer(); + foreach (Cookie cookie in cookieCollection) { - webRequest=Redirect(webRequest, webException); - webResponse=(HttpWebResponse)webRequest.GetResponse(); + cookieContainer.Add(uri,cookie); } - else throw; + webRequest.CookieContainer = cookieContainer; } + + try { webResponse = (HttpWebResponse)webRequest.GetResponse(); } + catch (WebException webException) + { + if (IsMovedException(webException)) + { + webRequest = Redirect(webRequest, webException); + webResponse = (HttpWebResponse)webRequest.GetResponse(); + } + else throw; + } Stream responseStream = webResponse.GetResponseStream(); while (true) { @@ -574,62 +592,63 @@ namespace MarketData.Integration MDTrace.WriteLine(LogLevel.VERBOSE,"GetRequestNoEncodingV2[LEAVE]"); } } + // I am using this code specifically on the seeking alpha web site. it seems as though seeking alpha has implemeted some user agent based bot prevention mechanism on their website // to prevent scrapers. What I do here is to choose from a set of commonly used user agents and then randomly choose a user agent to code into the request. retry logic up to 3 times // and handling of 404 (not found) - public static HttpNetResponse GetRequestNoEncodingV3(String strRequest,String referer=null) + public static HttpNetResponse GetRequestNoEncodingV3(String strRequest, String referer = null) { - HttpWebResponse webResponse=null; - WebException lastWebException=null; - + HttpWebResponse webResponse = null; + WebException lastWebException = null; + try { - MDTrace.WriteLine(LogLevel.VERBOSE,String.Format("GetRequestNoEncodingV3[ENTER]{0}",strRequest)); - Random random=new Random(); - String[] userAgents= + MDTrace.WriteLine(LogLevel.VERBOSE, String.Format("GetRequestNoEncodingV3[ENTER]{0}", strRequest)); + Random random = new Random(); + String[] userAgents = { "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537." }; - int MAX_RETRIES=5; - int TIMEOUT_BETWEEN_RETRIES=1000; + int MAX_RETRIES = 5; + int TIMEOUT_BETWEEN_RETRIES = 1000; int charCount = 0; byte[] buffer = new byte[8192]; StringBuilder sb = new StringBuilder(); - for(int count=0;count