3 Commits

3 changed files with 244 additions and 201 deletions

View File

@@ -30,93 +30,105 @@ class NewsFeed:
return False
def getItemsInAmericasNewsRoomFeed(self,url):
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
response.close()
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!= response:
response.close()
def getItemsInOutnumberedFeed(self,url):
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
response.close()
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getItemsInFeed(self,url):
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
self.writeLog(f"Loading videos from cache {cachePathFileName}")
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
self.writeLog(f"Loading videos from {url}")
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
response.close()
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
self.writeLog(f"Loading videos from cache {cachePathFileName}")
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
self.writeLog(f"Loading videos from {url}")
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def filterFeedMaxDays(self, videos, days):
now = datetime.now()
@@ -126,89 +138,97 @@ class NewsFeed:
if delta.days <= days:
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
self.writeLog(message)
filteredList.insert(0,video)
filteredList.append(video)
else:
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
self.writeLog(message)
return filteredList
def getUSItemsInFeed(self,url):
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
response.close()
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
url='https://video.foxnews.com/v/'+videoId
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(url)
status=innerResponse.status_code
innerResponse.close()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
videoUrl='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
status=innerResponse.status_code
innerResponse.close()
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getExclusiveItemsInFeed(self,url):
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
response.close()
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.Text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
url='https://video.foxnews.com/v/'+videoId
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(url)
status=innerResponse.status_code
innerResponse.close()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
videoUrl='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
status=innerResponse.status_code
innerResponse.close()
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getItemsInArchiveFeed(self,url,archiveDbFileName):
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
@@ -220,11 +240,11 @@ class NewsFeed:
def readFeedCache(self,pathFileName):
try:
videos=[]
# 'with' will automatically close the stream
with open(pathFileName,"r",encoding='utf-8') as inputStream:
for line in inputStream:
video=Video.fromString(line)
videos.append(video)
inputStream.close()
return(videos)
except:
self.writeLog(traceback.format_exc())
@@ -235,35 +255,36 @@ class NewsFeed:
with open(pathFileName,"w",encoding='utf-8') as outputStream:
for video in videos:
outputStream.write(video.toString()+"\n")
outputStream.close()
# 'with' will automatically close the stream
return(videos)
except:
self.writeLog(traceback.format_exc())
return(videos)
def isFeedCacheAvailable(self,pathFileName,expireMinutes):
def isFeedCacheAvailable(self, pathFileName, expireMinutes):
try:
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
if not os.path.isfile(pathFileName):
return(False)
modifiedTime=os.path.getmtime(pathFileName)
convertTime=time.localtime(modifiedTime)
formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
fileDateTime=time.strptime(formatTime,'%d%m%Y %H:%M:%S')
currentTime=datetime.now()
timedelta=currentTime-datetime(*(fileDateTime[0:6]))
hours, hremainder = divmod(timedelta.seconds,3600)
minutes, mremainder = divmod(timedelta.seconds,60)
self.writeLog('file is = "{age}" hours old'.format(age=hours))
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
if hours > 1 or minutes > expireMinutes:
self.archiveFile(pathFileName)
return(False)
return (True)
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
if not os.path.isfile(pathFileName):
return False
modifiedTime = os.path.getmtime(pathFileName)
convertTime = time.localtime(modifiedTime)
formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
currentTime = datetime.now()
elapsed = currentTime - datetime(*(fileDateTime[0:6]))
totalSeconds = int(elapsed.total_seconds())
hours, remainder = divmod(totalSeconds, 3600)
minutes, _ = divmod(remainder, 60)
self.writeLog('file is = "{age}" hours old'.format(age=hours))
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
if hours > 1 or minutes > expireMinutes:
self.archiveFile(pathFileName)
return False
return True
except:
self.writeLog(traceback.format_exc());
return(False)
self.writeLog(traceback.format_exc())
return False
def archiveFile(self, pathFileName):
if not os.path.isfile(pathFileName):
return(False)
@@ -316,25 +337,35 @@ class Sections:
if "tokenvod" in previewUrl:
return video, searchIndex
indexDescription=strContainingString.index("alt=\"")
# Handle video description
indexDescription=strContainingString.find("alt=\"")
if -1 == indexDescription:
return video, searchIndex
description=strContainingString[indexDescription:]
description=self.betweenString(description,'"','"')
description=self.removeHtml(description)
description=description.replace("- Fox News","")
if "vod.foxbusiness" in description:
return video, searchIndex
indexDuration=strContainingString.index("<div class=\"duration\">")
# Handle video duration
indexDuration=strContainingString.find("<div class=\"duration\">")
if -1 != indexDuration:
strDuration=strContainingString[indexDuration:]
strDuration=self.betweenString(strDuration,">","<")
description=description+" - "+strDuration
indexPublication=strContainingString.index("<div class=\"pub-date\">")
# Handle video publication
strPublication = ""
indexPublication=strContainingString.find("<div class=\"pub-date\">")
if -1 != indexPublication:
strPublication=strContainingString[indexPublication:]
strPublication=self.betweenString(strPublication,"<time>","</time>")
description=description+" ("+strPublication+")"
# Handle the icon
icon=None
indexIcon=strContainingString.index("srcset=")
indexIcon=strContainingString.find("srcset=")
if -1 != indexIcon:
icon=strContainingString[indexIcon:]
icon=self.betweenString(icon,"\"","\"")
@@ -421,12 +452,14 @@ class Sections:
for code in codes:
strItem=strItem.replace(code,"'")
strItem=strItem.replace("&amp;","&")
strItem=strItem.replace("&#x2018;","'")
strItem=strItem.replace("&#x2019;","'")
strItem=strItem.replace("&#x2018;","")
strItem=strItem.replace("&#x2019;","")
strItem=strItem.replace("&#x2014;","-")
strItem=strItem.replace("&#39;","'")
strItem=strItem.replace("???","'")
strItem=strItem.replace("&quot;","\"")
return strItem
def pad(str,filler,length):
stringLength=len(str)
sb=""

View File

@@ -36,29 +36,34 @@ class StringHelper:
def betweenString(strItem, strBegin, strEnd):
if strItem is None:
return None
index=-1
index = -1
if strBegin is None:
index=0
index = 0
else:
index = strItem.index(strBegin)
if -1==index:
try:
if strBegin.startswith("<") and strBegin.endswith(">"):
tag_name = strBegin[1:-1] # e.g. "time"
index = strItem.index("<" + tag_name)
index = strItem.index(">", index) + 1
else:
index = strItem.index(strBegin) + len(strBegin)
except ValueError:
return None
if index == -1:
return None
str=None
if strBegin is not None:
str=strItem[index+len(strBegin):]
else:
str=strItem
str = strItem[index:] if strBegin is not None else strItem
if strEnd is None:
return str
index=str.index(strEnd)
if -1==index :
try:
index = str.index(strEnd)
except ValueError:
return None
sb=""
for strIndex in range(0, len(str)-1):
if index==strIndex:
sb = ""
for strIndex in range(0, len(str) - 1):
if index == strIndex:
break
sb=sb+str[strIndex]
return (sb)
sb = sb + str[strIndex]
return sb
class HttpNetRequest:
def __init__(self):

View File

@@ -68,7 +68,12 @@ class Video:
description=splits[0].strip()
url=splits[1].strip()
icon=splits[2].strip()
timestamp=DateTime(splits[3].strip())
datePart = splits[3].strip()
timestamp = DateTime.getCurrentTime()
try :
timestamp=DateTime(datePart)
except Exception as exception:
print(f"Encountered invalid date '{datePart}'")
return(Video(description,url,icon,timestamp))
@staticmethod