Reviewed with Anthropic Claude and made bug fixes.

This commit is contained in:
2026-02-22 11:45:01 -05:00
parent 13b18b01dd
commit e660e385e5

View File

@@ -30,93 +30,105 @@ class NewsFeed:
return False return False
def getItemsInAmericasNewsRoomFeed(self,url): def getItemsInAmericasNewsRoomFeed(self,url):
now=datetime.now() response = None
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb) try:
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): now=datetime.now()
videos=self.readFeedCache(cachePathFileName) cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
if videos is not None: if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
return(videos) videos=self.readFeedCache(cachePathFileName)
sections=Sections() if videos is not None:
videos = {} return(videos)
httpNetRequest=HttpNetRequest() sections=Sections()
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) videos = {}
status=response.status_code httpNetRequest=HttpNetRequest()
searchIndex=0 response=httpNetRequest.getHttpNetRequest(url)
response.close() status=response.status_code
if status!=200: searchIndex=0
return None if status!=200:
if LOG_HTTP_RESPONSES: return None
self.writeLog(url) if LOG_HTTP_RESPONSES:
self.writeLog(response.text) self.writeLog(url)
while -1!= searchIndex: self.writeLog(response.text)
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) while -1!= searchIndex:
if video is not None and not (video.description in videos): video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
videos[video.description]=video if video is not None and not (video.description in videos):
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videos[video.description]=video
videoList=list(videos.values()) video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False) videoList=list(videos.values())
self.writeFeedCache(cachePathFileName,videoList) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
return (videoList) self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!= response:
response.close()
def getItemsInOutnumberedFeed(self,url): def getItemsInOutnumberedFeed(self,url):
now=datetime.now() response = None
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb) try:
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): now=datetime.now()
videos=self.readFeedCache(cachePathFileName) cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
if videos is not None: if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
return(videos) videos=self.readFeedCache(cachePathFileName)
sections=Sections() if videos is not None:
videos = {} return(videos)
httpNetRequest=HttpNetRequest() sections=Sections()
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) videos = {}
status=response.status_code httpNetRequest=HttpNetRequest()
searchIndex=0 response=httpNetRequest.getHttpNetRequest(url)
response.close() status=response.status_code
if status!=200: searchIndex=0
return None if status!=200:
if LOG_HTTP_RESPONSES: return None
self.writeLog(url) if LOG_HTTP_RESPONSES:
self.writeLog(response.text) self.writeLog(url)
while -1!= searchIndex: self.writeLog(response.text)
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) while -1!= searchIndex:
if video is not None and not (video.description in videos): video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
videos[video.description]=video if video is not None and not (video.description in videos):
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videos[video.description]=video
videoList=list(videos.values()) video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) videoList=list(videos.values())
self.writeFeedCache(cachePathFileName,videoList) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
return (videoList) self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getItemsInFeed(self,url): def getItemsInFeed(self,url):
now=datetime.now() response = None
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb) try:
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): now=datetime.now()
self.writeLog(f"Loading videos from cache {cachePathFileName}") cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
videos=self.readFeedCache(cachePathFileName) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
if videos is not None: self.writeLog(f"Loading videos from cache {cachePathFileName}")
return(videos) videos=self.readFeedCache(cachePathFileName)
sections=Sections() if videos is not None:
videos = {} return(videos)
httpNetRequest=HttpNetRequest() sections=Sections()
self.writeLog(f"Loading videos from {url}") videos = {}
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) httpNetRequest=HttpNetRequest()
status=response.status_code self.writeLog(f"Loading videos from {url}")
searchIndex=0 response=httpNetRequest.getHttpNetRequest(url)
response.close() status=response.status_code
if status!=200: searchIndex=0
return None if status!=200:
if LOG_HTTP_RESPONSES: return None
self.writeLog(url) if LOG_HTTP_RESPONSES:
self.writeLog(response.text) self.writeLog(url)
while -1!= searchIndex: self.writeLog(response.text)
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex) while -1!= searchIndex:
if video is not None and not (video.description in videos): video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
videos[video.description]=video if video is not None and not (video.description in videos):
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videos[video.description]=video
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS) video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
self.writeFeedCache(cachePathFileName,videoList) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
return (videoList) self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def filterFeedMaxDays(self, videos, days): def filterFeedMaxDays(self, videos, days):
now = datetime.now() now = datetime.now()
@@ -126,89 +138,97 @@ class NewsFeed:
if delta.days <= days: if delta.days <= days:
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
self.writeLog(message) self.writeLog(message)
filteredList.insert(0,video) filteredList.append(video)
else: else:
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
self.writeLog(message) self.writeLog(message)
return filteredList return filteredList
def getUSItemsInFeed(self,url): def getUSItemsInFeed(self,url):
now=datetime.now() response = None
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb) try:
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): now=datetime.now()
videos=self.readFeedCache(cachePathFileName) cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
if videos is not None: if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
return(videos) videos=self.readFeedCache(cachePathFileName)
sections=Sections() if videos is not None:
videos = {} return(videos)
httpNetRequest=HttpNetRequest() sections=Sections()
response=httpNetRequest.getHttpNetRequest(url) videos = {}
status=response.status_code
searchIndex=0
response.close()
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
url='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest() httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(url) response=httpNetRequest.getHttpNetRequest(url)
status=innerResponse.status_code status=response.status_code
innerResponse.close() searchIndex=0
if status!=200: if status!=200:
continue return None
video=sections.getVideoContentInSection(innerResponse.text) if LOG_HTTP_RESPONSES:
if video is not None and not (video.description in videos): self.writeLog(url)
videos[video.description]=video self.writeLog(response.text)
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) while -1!= searchIndex:
videoList=list(videos.values()) videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) if videoId is None:
self.writeFeedCache(cachePathFileName,videoList) continue
return (videoList) videoUrl='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
status=innerResponse.status_code
innerResponse.close()
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getExclusiveItemsInFeed(self,url): def getExclusiveItemsInFeed(self,url):
now=datetime.now() response = None
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb) try:
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): now=datetime.now()
videos=self.readFeedCache(cachePathFileName) cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
if videos is not None: if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
return(videos) videos=self.readFeedCache(cachePathFileName)
sections=Sections() if videos is not None:
videos = {} return(videos)
httpNetRequest=HttpNetRequest() sections=Sections()
response=httpNetRequest.getHttpNetRequest(url) videos = {}
status=response.status_code
searchIndex=0
response.close()
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.Text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
url='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest() httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(url) response=httpNetRequest.getHttpNetRequest(url)
status=innerResponse.status_code status=response.status_code
innerResponse.close() searchIndex=0
if status!=200: if status!=200:
continue return None
video=sections.getVideoContentInSection(innerResponse.text) if LOG_HTTP_RESPONSES:
if video is not None and not (video.description in videos): self.writeLog(url)
videos[video.description]=video self.writeLog(response.text)
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) while -1!= searchIndex:
videoList=list(videos.values()) videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) if videoId is None:
self.writeFeedCache(cachePathFileName,videoList) continue
return (videoList) videoUrl='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
status=innerResponse.status_code
innerResponse.close()
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getItemsInArchiveFeed(self,url,archiveDbFileName): def getItemsInArchiveFeed(self,url,archiveDbFileName):
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb) cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
@@ -220,11 +240,11 @@ class NewsFeed:
def readFeedCache(self,pathFileName): def readFeedCache(self,pathFileName):
try: try:
videos=[] videos=[]
# 'with' will automatically close the stream
with open(pathFileName,"r",encoding='utf-8') as inputStream: with open(pathFileName,"r",encoding='utf-8') as inputStream:
for line in inputStream: for line in inputStream:
video=Video.fromString(line) video=Video.fromString(line)
videos.append(video) videos.append(video)
inputStream.close()
return(videos) return(videos)
except: except:
self.writeLog(traceback.format_exc()) self.writeLog(traceback.format_exc())
@@ -235,34 +255,35 @@ class NewsFeed:
with open(pathFileName,"w",encoding='utf-8') as outputStream: with open(pathFileName,"w",encoding='utf-8') as outputStream:
for video in videos: for video in videos:
outputStream.write(video.toString()+"\n") outputStream.write(video.toString()+"\n")
outputStream.close() # 'with' will automatically close the stream
return(videos) return(videos)
except: except:
self.writeLog(traceback.format_exc()) self.writeLog(traceback.format_exc())
return(videos) return(videos)
def isFeedCacheAvailable(self,pathFileName,expireMinutes): def isFeedCacheAvailable(self, pathFileName, expireMinutes):
try: try:
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName)) self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
if not os.path.isfile(pathFileName): if not os.path.isfile(pathFileName):
return(False) return False
modifiedTime=os.path.getmtime(pathFileName) modifiedTime = os.path.getmtime(pathFileName)
convertTime=time.localtime(modifiedTime) convertTime = time.localtime(modifiedTime)
formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime) formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
fileDateTime=time.strptime(formatTime,'%d%m%Y %H:%M:%S') fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
currentTime=datetime.now() currentTime = datetime.now()
timedelta=currentTime-datetime(*(fileDateTime[0:6])) elapsed = currentTime - datetime(*(fileDateTime[0:6]))
hours, hremainder = divmod(timedelta.seconds,3600) totalSeconds = int(elapsed.total_seconds())
minutes, mremainder = divmod(timedelta.seconds,60) hours, remainder = divmod(totalSeconds, 3600)
self.writeLog('file is = "{age}" hours old'.format(age=hours)) minutes, _ = divmod(remainder, 60)
self.writeLog('file is = "{age}" minutes old'.format(age=minutes)) self.writeLog('file is = "{age}" hours old'.format(age=hours))
if hours > 1 or minutes > expireMinutes: self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
self.archiveFile(pathFileName) if hours > 1 or minutes > expireMinutes:
return(False) self.archiveFile(pathFileName)
return (True) return False
return True
except: except:
self.writeLog(traceback.format_exc()); self.writeLog(traceback.format_exc())
return(False) return False
def archiveFile(self, pathFileName): def archiveFile(self, pathFileName):
if not os.path.isfile(pathFileName): if not os.path.isfile(pathFileName):
@@ -316,25 +337,35 @@ class Sections:
if "tokenvod" in previewUrl: if "tokenvod" in previewUrl:
return video, searchIndex return video, searchIndex
indexDescription=strContainingString.index("alt=\"") # Handle video description
indexDescription=strContainingString.find("alt=\"")
if -1 == indexDescription:
return video, searchIndex
description=strContainingString[indexDescription:] description=strContainingString[indexDescription:]
description=self.betweenString(description,'"','"') description=self.betweenString(description,'"','"')
description=self.removeHtml(description) description=self.removeHtml(description)
description=description.replace("- Fox News","") description=description.replace("- Fox News","")
if "vod.foxbusiness" in description: if "vod.foxbusiness" in description:
return video, searchIndex return video, searchIndex
indexDuration=strContainingString.index("<div class=\"duration\">")
# Handle video duration
indexDuration=strContainingString.find("<div class=\"duration\">")
if -1 != indexDuration: if -1 != indexDuration:
strDuration=strContainingString[indexDuration:] strDuration=strContainingString[indexDuration:]
strDuration=self.betweenString(strDuration,">","<") strDuration=self.betweenString(strDuration,">","<")
description=description+" - "+strDuration description=description+" - "+strDuration
indexPublication=strContainingString.index("<div class=\"pub-date\">")
# Handle video publication
strPublication = ""
indexPublication=strContainingString.find("<div class=\"pub-date\">")
if -1 != indexPublication: if -1 != indexPublication:
strPublication=strContainingString[indexPublication:] strPublication=strContainingString[indexPublication:]
strPublication=self.betweenString(strPublication,"<time>","</time>") strPublication=self.betweenString(strPublication,"<time>","</time>")
description=description+" ("+strPublication+")" description=description+" ("+strPublication+")"
# Handle the icon
icon=None icon=None
indexIcon=strContainingString.index("srcset=") indexIcon=strContainingString.find("srcset=")
if -1 != indexIcon: if -1 != indexIcon:
icon=strContainingString[indexIcon:] icon=strContainingString[indexIcon:]
icon=self.betweenString(icon,"\"","\"") icon=self.betweenString(icon,"\"","\"")