Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 98c37f2204 | |||
| 4cb76dfb58 | |||
| e660e385e5 |
105
newsfeed.py
105
newsfeed.py
@@ -30,6 +30,8 @@ class NewsFeed:
|
||||
return False
|
||||
|
||||
def getItemsInAmericasNewsRoomFeed(self,url):
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
@@ -39,10 +41,9 @@ class NewsFeed:
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
@@ -57,8 +58,13 @@ class NewsFeed:
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!= response:
|
||||
response.close()
|
||||
|
||||
def getItemsInOutnumberedFeed(self,url):
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
@@ -68,10 +74,9 @@ class NewsFeed:
|
||||
sections=Sections()
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
@@ -86,8 +91,13 @@ class NewsFeed:
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getItemsInFeed(self,url):
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
@@ -99,10 +109,9 @@ class NewsFeed:
|
||||
videos = {}
|
||||
httpNetRequest=HttpNetRequest()
|
||||
self.writeLog(f"Loading videos from {url}")
|
||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
@@ -117,6 +126,9 @@ class NewsFeed:
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def filterFeedMaxDays(self, videos, days):
|
||||
now = datetime.now()
|
||||
@@ -126,13 +138,15 @@ class NewsFeed:
|
||||
if delta.days <= days:
|
||||
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
self.writeLog(message)
|
||||
filteredList.insert(0,video)
|
||||
filteredList.append(video)
|
||||
else:
|
||||
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||
self.writeLog(message)
|
||||
return filteredList
|
||||
|
||||
def getUSItemsInFeed(self,url):
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
@@ -145,7 +159,6 @@ class NewsFeed:
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
@@ -155,9 +168,9 @@ class NewsFeed:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
url='https://video.foxnews.com/v/'+videoId
|
||||
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
if status!=200:
|
||||
@@ -170,8 +183,13 @@ class NewsFeed:
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getExclusiveItemsInFeed(self,url):
|
||||
response = None
|
||||
try:
|
||||
now=datetime.now()
|
||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
|
||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||
@@ -184,19 +202,18 @@ class NewsFeed:
|
||||
response=httpNetRequest.getHttpNetRequest(url)
|
||||
status=response.status_code
|
||||
searchIndex=0
|
||||
response.close()
|
||||
if status!=200:
|
||||
return None
|
||||
if LOG_HTTP_RESPONSES:
|
||||
self.writeLog(url)
|
||||
self.writeLog(response.Text)
|
||||
self.writeLog(response.text)
|
||||
while -1!= searchIndex:
|
||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||
if videoId is None:
|
||||
continue
|
||||
url='https://video.foxnews.com/v/'+videoId
|
||||
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||
httpNetRequest=HttpNetRequest()
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
||||
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||
status=innerResponse.status_code
|
||||
innerResponse.close()
|
||||
if status!=200:
|
||||
@@ -209,6 +226,9 @@ class NewsFeed:
|
||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||
self.writeFeedCache(cachePathFileName,videoList)
|
||||
return (videoList)
|
||||
finally:
|
||||
if None!=response:
|
||||
response.close()
|
||||
|
||||
def getItemsInArchiveFeed(self,url,archiveDbFileName):
|
||||
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
|
||||
@@ -220,11 +240,11 @@ class NewsFeed:
|
||||
def readFeedCache(self,pathFileName):
|
||||
try:
|
||||
videos=[]
|
||||
# 'with' will automatically close the stream
|
||||
with open(pathFileName,"r",encoding='utf-8') as inputStream:
|
||||
for line in inputStream:
|
||||
video=Video.fromString(line)
|
||||
videos.append(video)
|
||||
inputStream.close()
|
||||
return(videos)
|
||||
except:
|
||||
self.writeLog(traceback.format_exc())
|
||||
@@ -235,34 +255,35 @@ class NewsFeed:
|
||||
with open(pathFileName,"w",encoding='utf-8') as outputStream:
|
||||
for video in videos:
|
||||
outputStream.write(video.toString()+"\n")
|
||||
outputStream.close()
|
||||
# 'with' will automatically close the stream
|
||||
return(videos)
|
||||
except:
|
||||
self.writeLog(traceback.format_exc())
|
||||
return(videos)
|
||||
|
||||
def isFeedCacheAvailable(self,pathFileName,expireMinutes):
|
||||
def isFeedCacheAvailable(self, pathFileName, expireMinutes):
|
||||
try:
|
||||
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
|
||||
if not os.path.isfile(pathFileName):
|
||||
return(False)
|
||||
modifiedTime=os.path.getmtime(pathFileName)
|
||||
convertTime=time.localtime(modifiedTime)
|
||||
formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
|
||||
fileDateTime=time.strptime(formatTime,'%d%m%Y %H:%M:%S')
|
||||
currentTime=datetime.now()
|
||||
timedelta=currentTime-datetime(*(fileDateTime[0:6]))
|
||||
hours, hremainder = divmod(timedelta.seconds,3600)
|
||||
minutes, mremainder = divmod(timedelta.seconds,60)
|
||||
return False
|
||||
modifiedTime = os.path.getmtime(pathFileName)
|
||||
convertTime = time.localtime(modifiedTime)
|
||||
formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
|
||||
fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
|
||||
currentTime = datetime.now()
|
||||
elapsed = currentTime - datetime(*(fileDateTime[0:6]))
|
||||
totalSeconds = int(elapsed.total_seconds())
|
||||
hours, remainder = divmod(totalSeconds, 3600)
|
||||
minutes, _ = divmod(remainder, 60)
|
||||
self.writeLog('file is = "{age}" hours old'.format(age=hours))
|
||||
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
|
||||
if hours > 1 or minutes > expireMinutes:
|
||||
self.archiveFile(pathFileName)
|
||||
return(False)
|
||||
return (True)
|
||||
return False
|
||||
return True
|
||||
except:
|
||||
self.writeLog(traceback.format_exc());
|
||||
return(False)
|
||||
self.writeLog(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def archiveFile(self, pathFileName):
|
||||
if not os.path.isfile(pathFileName):
|
||||
@@ -316,25 +337,35 @@ class Sections:
|
||||
if "tokenvod" in previewUrl:
|
||||
return video, searchIndex
|
||||
|
||||
indexDescription=strContainingString.index("alt=\"")
|
||||
# Handle video description
|
||||
indexDescription=strContainingString.find("alt=\"")
|
||||
if -1 == indexDescription:
|
||||
return video, searchIndex
|
||||
description=strContainingString[indexDescription:]
|
||||
description=self.betweenString(description,'"','"')
|
||||
description=self.removeHtml(description)
|
||||
description=description.replace("- Fox News","")
|
||||
if "vod.foxbusiness" in description:
|
||||
return video, searchIndex
|
||||
indexDuration=strContainingString.index("<div class=\"duration\">")
|
||||
|
||||
# Handle video duration
|
||||
indexDuration=strContainingString.find("<div class=\"duration\">")
|
||||
if -1 != indexDuration:
|
||||
strDuration=strContainingString[indexDuration:]
|
||||
strDuration=self.betweenString(strDuration,">","<")
|
||||
description=description+" - "+strDuration
|
||||
indexPublication=strContainingString.index("<div class=\"pub-date\">")
|
||||
|
||||
# Handle video publication
|
||||
strPublication = ""
|
||||
indexPublication=strContainingString.find("<div class=\"pub-date\">")
|
||||
if -1 != indexPublication:
|
||||
strPublication=strContainingString[indexPublication:]
|
||||
strPublication=self.betweenString(strPublication,"<time>","</time>")
|
||||
description=description+" ("+strPublication+")"
|
||||
|
||||
# Handle the icon
|
||||
icon=None
|
||||
indexIcon=strContainingString.index("srcset=")
|
||||
indexIcon=strContainingString.find("srcset=")
|
||||
if -1 != indexIcon:
|
||||
icon=strContainingString[indexIcon:]
|
||||
icon=self.betweenString(icon,"\"","\"")
|
||||
@@ -421,10 +452,12 @@ class Sections:
|
||||
for code in codes:
|
||||
strItem=strItem.replace(code,"'")
|
||||
strItem=strItem.replace("&","&")
|
||||
strItem=strItem.replace("‘","'")
|
||||
strItem=strItem.replace("’","'")
|
||||
strItem=strItem.replace("‘","‘")
|
||||
strItem=strItem.replace("’","’")
|
||||
strItem=strItem.replace("—","-")
|
||||
strItem=strItem.replace("'","'")
|
||||
strItem=strItem.replace("???","'")
|
||||
strItem=strItem.replace(""","\"")
|
||||
return strItem
|
||||
|
||||
def pad(str,filler,length):
|
||||
|
||||
37
utility.py
37
utility.py
@@ -36,29 +36,34 @@ class StringHelper:
|
||||
def betweenString(strItem, strBegin, strEnd):
|
||||
if strItem is None:
|
||||
return None
|
||||
index=-1
|
||||
index = -1
|
||||
if strBegin is None:
|
||||
index=0
|
||||
index = 0
|
||||
else:
|
||||
index = strItem.index(strBegin)
|
||||
if -1==index:
|
||||
try:
|
||||
if strBegin.startswith("<") and strBegin.endswith(">"):
|
||||
tag_name = strBegin[1:-1] # e.g. "time"
|
||||
index = strItem.index("<" + tag_name)
|
||||
index = strItem.index(">", index) + 1
|
||||
else:
|
||||
index = strItem.index(strBegin) + len(strBegin)
|
||||
except ValueError:
|
||||
return None
|
||||
str=None
|
||||
if strBegin is not None:
|
||||
str=strItem[index+len(strBegin):]
|
||||
else:
|
||||
str=strItem
|
||||
if index == -1:
|
||||
return None
|
||||
str = strItem[index:] if strBegin is not None else strItem
|
||||
if strEnd is None:
|
||||
return str
|
||||
index=str.index(strEnd)
|
||||
if -1==index :
|
||||
try:
|
||||
index = str.index(strEnd)
|
||||
except ValueError:
|
||||
return None
|
||||
sb=""
|
||||
for strIndex in range(0, len(str)-1):
|
||||
if index==strIndex:
|
||||
sb = ""
|
||||
for strIndex in range(0, len(str) - 1):
|
||||
if index == strIndex:
|
||||
break
|
||||
sb=sb+str[strIndex]
|
||||
return (sb)
|
||||
sb = sb + str[strIndex]
|
||||
return sb
|
||||
|
||||
class HttpNetRequest:
|
||||
def __init__(self):
|
||||
|
||||
7
video.py
7
video.py
@@ -68,7 +68,12 @@ class Video:
|
||||
description=splits[0].strip()
|
||||
url=splits[1].strip()
|
||||
icon=splits[2].strip()
|
||||
timestamp=DateTime(splits[3].strip())
|
||||
datePart = splits[3].strip()
|
||||
timestamp = DateTime.getCurrentTime()
|
||||
try :
|
||||
timestamp=DateTime(datePart)
|
||||
except Exception as exception:
|
||||
print(f"Encountered invalid date '{datePart}'")
|
||||
return(Video(description,url,icon,timestamp))
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user