Reviewed with Anthropic Claude and made bug fixes.
This commit is contained in:
393
newsfeed.py
393
newsfeed.py
@@ -30,93 +30,105 @@ class NewsFeed:
|
||||
return False
|
||||
|
||||
def getItemsInAmericasNewsRoomFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!= response:
|
||||
response.close()
|
||||
|
||||
def getItemsInOutnumberedFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getItemsInFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
self.writeLog(f"Loading videos from cache {cachePathFileName}")
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
self.writeLog(f"Loading videos from {url}")
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
self.writeLog(f"Loading videos from cache {cachePathFileName}")
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
self.writeLog(f"Loading videos from {url}")
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def filterFeedMaxDays(self, videos, days):
|
||||
now = datetime.now()
|
||||
@@ -126,89 +138,97 @@ class NewsFeed:
|
||||
if delta.days <= days:
|
||||
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
self.writeLog(message)
|
||||
filteredList.insert(0,video)
|
||||
filteredList.append(video)
|
||||
else:
|
||||
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
self.writeLog(message)
|
||||
return filteredList
|
||||
|
||||
def getUSItemsInFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
url='https://video.foxnews.com/v/'+videoId
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getExclusiveItemsInFeed(self,url):
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.Text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
url='https://video.foxnews.com/v/'+videoId
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
videos=self.readFeedCache(cachePathFileName)
|
||||
if videos is not None:
|
||||
return(videos)
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
if status!=200:
|
||||
continue
|
||||
video=sections.getVideoContentInSection(innerResponse.text)
|
||||
if video is not None and not (video.description in videos):
|
||||
videos[video.description]=video
|
||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||
videoList=list(videos.values())
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getItemsInArchiveFeed(self,url,archiveDbFileName):
|
||||
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
|
||||
@@ -220,11 +240,11 @@ class NewsFeed:
|
||||
def readFeedCache(self,pathFileName):
|
||||
try:
|
||||
videos=[]
|
||||
# 'with' will automatically close the stream
|
||||
with open(pathFileName,"r",encoding='utf-8') as inputStream:
|
||||
for line in inputStream:
|
||||
video=Video.fromString(line)
|
||||
videos.append(video)
|
||||
inputStream.close()
|
||||
return(videos)
|
||||
except:
|
||||
self.writeLog(traceback.format_exc())
|
||||
@@ -235,35 +255,36 @@ class NewsFeed:
|
||||
with open(pathFileName,"w",encoding='utf-8') as outputStream:
|
||||
for video in videos:
|
||||
outputStream.write(video.toString()+"\n")
|
||||
outputStream.close()
|
||||
# 'with' will automatically close the stream
|
||||
return(videos)
|
||||
except:
|
||||
self.writeLog(traceback.format_exc())
|
||||
return(videos)
|
||||
|
||||
def isFeedCacheAvailable(self,pathFileName,expireMinutes):
|
||||
def isFeedCacheAvailable(self, pathFileName, expireMinutes):
|
||||
try:
|
||||
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
|
||||
if not os.path.isfile(pathFileName):
|
||||
return(False)
|
||||
modifiedTime=os.path.getmtime(pathFileName)
|
||||
convertTime=time.localtime(modifiedTime)
|
||||
formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
|
||||
fileDateTime=time.strptime(formatTime,'%d%m%Y %H:%M:%S')
|
||||
currentTime=datetime.now()
|
||||
timedelta=currentTime-datetime(*(fileDateTime[0:6]))
|
||||
hours, hremainder = divmod(timedelta.seconds,3600)
|
||||
minutes, mremainder = divmod(timedelta.seconds,60)
|
||||
self.writeLog('file is = "{age}" hours old'.format(age=hours))
|
||||
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
|
||||
if hours > 1 or minutes > expireMinutes:
|
||||
self.archiveFile(pathFileName)
|
||||
return(False)
|
||||
return (True)
|
||||
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
|
||||
if not os.path.isfile(pathFileName):
|
||||
return False
|
||||
modifiedTime = os.path.getmtime(pathFileName)
|
||||
convertTime = time.localtime(modifiedTime)
|
||||
formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
|
||||
fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
|
||||
currentTime = datetime.now()
|
||||
elapsed = currentTime - datetime(*(fileDateTime[0:6]))
|
||||
totalSeconds = int(elapsed.total_seconds())
|
||||
hours, remainder = divmod(totalSeconds, 3600)
|
||||
minutes, _ = divmod(remainder, 60)
|
||||
self.writeLog('file is = "{age}" hours old'.format(age=hours))
|
||||
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
|
||||
if hours > 1 or minutes > expireMinutes:
|
||||
self.archiveFile(pathFileName)
|
||||
return False
|
||||
return True
|
||||
except:
|
||||
self.writeLog(traceback.format_exc());
|
||||
return(False)
|
||||
|
||||
self.writeLog(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def archiveFile(self, pathFileName):
|
||||
if not os.path.isfile(pathFileName):
|
||||
return(False)
|
||||
@@ -316,25 +337,35 @@ class Sections:
|
||||
if "tokenvod" in previewUrl:
|
||||
return video, searchIndex
|
||||
|
||||
indexDescription=strContainingString.index("alt=\"")
|
||||
# Handle video description
|
||||
indexDescription=strContainingString.find("alt=\"")
|
||||
if -1 == indexDescription:
|
||||
return video, searchIndex
|
||||
description=strContainingString[indexDescription:]
|
||||
description=self.betweenString(description,'"','"')
|
||||
description=self.removeHtml(description)
|
||||
description=description.replace("- Fox News","")
|
||||
if "vod.foxbusiness" in description:
|
||||
return video, searchIndex
|
||||
indexDuration=strContainingString.index("<div class=\"duration\">")
|
||||
|
||||
# Handle video duration
|
||||
indexDuration=strContainingString.find("<div class=\"duration\">")
|
||||
if -1 != indexDuration:
|
||||
strDuration=strContainingString[indexDuration:]
|
||||
strDuration=self.betweenString(strDuration,">","<")
|
||||
description=description+" - "+strDuration
|
||||
indexPublication=strContainingString.index("<div class=\"pub-date\">")
|
||||
|
||||
# Handle video publication
|
||||
strPublication = ""
|
||||
indexPublication=strContainingString.find("<div class=\"pub-date\">")
|
||||
if -1 != indexPublication:
|
||||
strPublication=strContainingString[indexPublication:]
|
||||
strPublication=self.betweenString(strPublication,"<time>","</time>")
|
||||
description=description+" ("+strPublication+")"
|
||||
|
||||
# Handle the icon
|
||||
icon=None
|
||||
indexIcon=strContainingString.index("srcset=")
|
||||
indexIcon=strContainingString.find("srcset=")
|
||||
if -1 != indexIcon:
|
||||
icon=strContainingString[indexIcon:]
|
||||
icon=self.betweenString(icon,"\"","\"")
|
||||
|
||||
Reference in New Issue
Block a user