Commit Latest

This commit is contained in:
2025-10-01 23:13:03 -04:00
parent 5343f070c6
commit e92a8e2305
2 changed files with 138 additions and 43 deletions

View File

@@ -406,7 +406,7 @@ namespace MarketData.Integration
}
}
public static HttpNetResponse GetRequestNoEncodingV2(String strRequest,WebProxy webProxy=null)
public static HttpNetResponse GetRequestNoEncodingV2(String strRequest,CookieCollection cookieCollection,Uri uri,WebProxy webProxy=null)
{
HttpWebResponse webResponse=null;
try
@@ -424,17 +424,35 @@ namespace MarketData.Integration
webRequest.Headers.Add("Accept-Encoding: None");
webRequest.Headers.Add("Accept-Language: en-US");
webRequest.UserAgent = "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36";
webRequest.KeepAlive = true;
try{webResponse = (HttpWebResponse)webRequest.GetResponse();}
catch(WebException webException)
webRequest.Headers.Add("sec-fetch-dest", "document");
webRequest.Headers.Add("sec-fetch-mode", "navigate");
webRequest.Headers.Add("sec-fetch-site","none");
webRequest.Headers.Add("sec-fetch-user","?1");
webRequest.Headers.Add("upgrade-insecure-requests","1");
webRequest.Headers.Add("priority","u=0, i");
webRequest.AllowAutoRedirect = true;
// webRequest.KeepAlive = true;
if (null != cookieCollection)
{
if(IsMovedException(webException))
CookieContainer cookieContainer = new CookieContainer();
foreach (Cookie cookie in cookieCollection)
{
webRequest=Redirect(webRequest, webException);
webResponse=(HttpWebResponse)webRequest.GetResponse();
cookieContainer.Add(uri,cookie);
}
else throw;
webRequest.CookieContainer = cookieContainer;
}
try { webResponse = (HttpWebResponse)webRequest.GetResponse(); }
catch (WebException webException)
{
if (IsMovedException(webException))
{
webRequest = Redirect(webRequest, webException);
webResponse = (HttpWebResponse)webRequest.GetResponse();
}
else throw;
}
Stream responseStream = webResponse.GetResponseStream();
while (true)
{
@@ -574,62 +592,63 @@ namespace MarketData.Integration
MDTrace.WriteLine(LogLevel.VERBOSE,"GetRequestNoEncodingV2[LEAVE]");
}
}
// I am using this code specifically on the seeking alpha web site. it seems as though seeking alpha has implemeted some user agent based bot prevention mechanism on their website
// to prevent scrapers. What I do here is to choose from a set of commonly used user agents and then randomly choose a user agent to code into the request. retry logic up to 3 times
// and handling of 404 (not found)
public static HttpNetResponse GetRequestNoEncodingV3(String strRequest,String referer=null)
public static HttpNetResponse GetRequestNoEncodingV3(String strRequest, String referer = null)
{
HttpWebResponse webResponse=null;
WebException lastWebException=null;
HttpWebResponse webResponse = null;
WebException lastWebException = null;
try
{
MDTrace.WriteLine(LogLevel.VERBOSE,String.Format("GetRequestNoEncodingV3[ENTER]{0}",strRequest));
Random random=new Random();
String[] userAgents=
MDTrace.WriteLine(LogLevel.VERBOSE, String.Format("GetRequestNoEncodingV3[ENTER]{0}", strRequest));
Random random = new Random();
String[] userAgents =
{
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537."
};
int MAX_RETRIES=5;
int TIMEOUT_BETWEEN_RETRIES=1000;
int MAX_RETRIES = 5;
int TIMEOUT_BETWEEN_RETRIES = 1000;
int charCount = 0;
byte[] buffer = new byte[8192];
StringBuilder sb = new StringBuilder();
for(int count=0;count<MAX_RETRIES;count++)
for (int count = 0; count < MAX_RETRIES; count++)
{
CookieContainer cookieContainer=new CookieContainer();
CookieContainer cookieContainer = new CookieContainer();
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(new Uri(strRequest));
webRequest.CookieContainer=cookieContainer;
webRequest.CookieContainer = cookieContainer;
webRequest.Timeout = REQUEST_TIMEOUT;
webRequest.Headers.Add("Accept-Language: en-US,en;q=0.5");
webRequest.Headers.Add("Accept-Encoding: gzip,deflate");
webRequest.Accept = "text/html";
String userAgent=userAgents[random.Next(0,userAgents.Length-1)];
userAgent+=random.Next(1,57).ToString();
webRequest.UserAgent=userAgent;
if(null!=referer)webRequest.Referer=referer;
String userAgent = userAgents[random.Next(0, userAgents.Length - 1)];
userAgent += random.Next(1, 57).ToString();
webRequest.UserAgent = userAgent;
if (null != referer) webRequest.Referer = referer;
try
{
try{webResponse = (HttpWebResponse)webRequest.GetResponse();}
catch(WebException webException)
try { webResponse = (HttpWebResponse)webRequest.GetResponse(); }
catch (WebException webException)
{
if(IsMovedException(webException))
if (IsMovedException(webException))
{
webRequest=Redirect(webRequest, webException);
webResponse=(HttpWebResponse)webRequest.GetResponse();
webRequest = Redirect(webRequest, webException);
webResponse = (HttpWebResponse)webRequest.GetResponse();
}
else throw;
}
Stream responseStream = webResponse.GetResponseStream();
if(webResponse.ContentEncoding.ToLower().Contains("gzip"))
if (webResponse.ContentEncoding.ToLower().Contains("gzip"))
{
responseStream = new GZipStream(responseStream, CompressionMode.Decompress);
responseStream = new GZipStream(responseStream, CompressionMode.Decompress);
StreamReader reader = new StreamReader(responseStream, Encoding.Default);
sb.Append(reader.ReadToEnd());
reader.Close();
}
else if(webResponse.ContentEncoding.ToLower().Contains("deflate"))
else if (webResponse.ContentEncoding.ToLower().Contains("deflate"))
{
responseStream = new DeflateStream(responseStream, CompressionMode.Decompress);
StreamReader reader = new StreamReader(responseStream, Encoding.Default);
@@ -645,33 +664,33 @@ namespace MarketData.Integration
sb.Append(Encoding.ASCII.GetString(buffer, 0, charCount));
}
}
return new HttpNetResponse(sb.ToString(),strRequest,webResponse,true);
return new HttpNetResponse(sb.ToString(), strRequest, webResponse, true);
}
catch (WebException webException)
{
if(webException.Message.Contains("(404) Not Found"))
if (webException.Message.Contains("(404) Not Found"))
{
return new HttpNetResponse((HttpWebResponse)webException.Response, strRequest, false, webException.Message);
}
lastWebException=webException;
lastWebException = webException;
Thread.Sleep(TIMEOUT_BETWEEN_RETRIES);
}
}
MDTrace.WriteLine(LogLevel.DEBUG,String.Format("General failure with {0}",lastWebException.Message));
return new HttpNetResponse(webResponse,strRequest,false,lastWebException.Message);
MDTrace.WriteLine(LogLevel.DEBUG, String.Format("General failure with {0}", lastWebException.Message));
return new HttpNetResponse(webResponse, strRequest, false, lastWebException.Message);
}
catch (Exception exception)
{
return new HttpNetResponse(webResponse,strRequest,false,exception.Message);
return new HttpNetResponse(webResponse, strRequest, false, exception.Message);
}
finally
{
if(null!=webResponse)
if (null != webResponse)
{
webResponse.Close();
webResponse.Dispose();
}
MDTrace.WriteLine(LogLevel.VERBOSE,"GetRequestNoEncodingV3[LEAVE]");
MDTrace.WriteLine(LogLevel.VERBOSE, "GetRequestNoEncodingV3[LEAVE]");
}
}
// https://query1.finance.yahoo.com/v7/finance/chart/MIDD?period1=1606172400&period2=1606172400&interval=1d&events=history