Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4cb76dfb58 | |||
| e660e385e5 |
397
newsfeed.py
397
newsfeed.py
@@ -30,93 +30,105 @@ class NewsFeed:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def getItemsInAmericasNewsRoomFeed(self,url):
|
def getItemsInAmericasNewsRoomFeed(self,url):
|
||||||
now=datetime.now()
|
response = None
|
||||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
|
try:
|
||||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
now=datetime.now()
|
||||||
videos=self.readFeedCache(cachePathFileName)
|
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
|
||||||
if videos is not None:
|
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||||
return(videos)
|
videos=self.readFeedCache(cachePathFileName)
|
||||||
sections=Sections()
|
if videos is not None:
|
||||||
videos = {}
|
return(videos)
|
||||||
httpNetRequest=HttpNetRequest()
|
sections=Sections()
|
||||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
videos = {}
|
||||||
status=response.status_code
|
httpNetRequest=HttpNetRequest()
|
||||||
searchIndex=0
|
response=httpNetRequest.getHttpNetRequest(url)
|
||||||
response.close()
|
status=response.status_code
|
||||||
if status!=200:
|
searchIndex=0
|
||||||
return None
|
if status!=200:
|
||||||
if LOG_HTTP_RESPONSES:
|
return None
|
||||||
self.writeLog(url)
|
if LOG_HTTP_RESPONSES:
|
||||||
self.writeLog(response.text)
|
self.writeLog(url)
|
||||||
while -1!= searchIndex:
|
self.writeLog(response.text)
|
||||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
while -1!= searchIndex:
|
||||||
if video is not None and not (video.description in videos):
|
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||||
videos[video.description]=video
|
if video is not None and not (video.description in videos):
|
||||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
videos[video.description]=video
|
||||||
videoList=list(videos.values())
|
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
|
videoList=list(videos.values())
|
||||||
self.writeFeedCache(cachePathFileName,videoList)
|
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
|
||||||
return (videoList)
|
self.writeFeedCache(cachePathFileName,videoList)
|
||||||
|
return (videoList)
|
||||||
|
finally:
|
||||||
|
if None!= response:
|
||||||
|
response.close()
|
||||||
|
|
||||||
def getItemsInOutnumberedFeed(self,url):
|
def getItemsInOutnumberedFeed(self,url):
|
||||||
now=datetime.now()
|
response = None
|
||||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
|
try:
|
||||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
now=datetime.now()
|
||||||
videos=self.readFeedCache(cachePathFileName)
|
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
|
||||||
if videos is not None:
|
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||||
return(videos)
|
videos=self.readFeedCache(cachePathFileName)
|
||||||
sections=Sections()
|
if videos is not None:
|
||||||
videos = {}
|
return(videos)
|
||||||
httpNetRequest=HttpNetRequest()
|
sections=Sections()
|
||||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
videos = {}
|
||||||
status=response.status_code
|
httpNetRequest=HttpNetRequest()
|
||||||
searchIndex=0
|
response=httpNetRequest.getHttpNetRequest(url)
|
||||||
response.close()
|
status=response.status_code
|
||||||
if status!=200:
|
searchIndex=0
|
||||||
return None
|
if status!=200:
|
||||||
if LOG_HTTP_RESPONSES:
|
return None
|
||||||
self.writeLog(url)
|
if LOG_HTTP_RESPONSES:
|
||||||
self.writeLog(response.text)
|
self.writeLog(url)
|
||||||
while -1!= searchIndex:
|
self.writeLog(response.text)
|
||||||
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
while -1!= searchIndex:
|
||||||
if video is not None and not (video.description in videos):
|
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
|
||||||
videos[video.description]=video
|
if video is not None and not (video.description in videos):
|
||||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
videos[video.description]=video
|
||||||
videoList=list(videos.values())
|
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
videoList=list(videos.values())
|
||||||
self.writeFeedCache(cachePathFileName,videoList)
|
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||||
return (videoList)
|
self.writeFeedCache(cachePathFileName,videoList)
|
||||||
|
return (videoList)
|
||||||
|
finally:
|
||||||
|
if None!=response:
|
||||||
|
response.close()
|
||||||
|
|
||||||
def getItemsInFeed(self,url):
|
def getItemsInFeed(self,url):
|
||||||
now=datetime.now()
|
response = None
|
||||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
|
try:
|
||||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
now=datetime.now()
|
||||||
self.writeLog(f"Loading videos from cache {cachePathFileName}")
|
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
|
||||||
videos=self.readFeedCache(cachePathFileName)
|
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||||
if videos is not None:
|
self.writeLog(f"Loading videos from cache {cachePathFileName}")
|
||||||
return(videos)
|
videos=self.readFeedCache(cachePathFileName)
|
||||||
sections=Sections()
|
if videos is not None:
|
||||||
videos = {}
|
return(videos)
|
||||||
httpNetRequest=HttpNetRequest()
|
sections=Sections()
|
||||||
self.writeLog(f"Loading videos from {url}")
|
videos = {}
|
||||||
response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
|
httpNetRequest=HttpNetRequest()
|
||||||
status=response.status_code
|
self.writeLog(f"Loading videos from {url}")
|
||||||
searchIndex=0
|
response=httpNetRequest.getHttpNetRequest(url)
|
||||||
response.close()
|
status=response.status_code
|
||||||
if status!=200:
|
searchIndex=0
|
||||||
return None
|
if status!=200:
|
||||||
if LOG_HTTP_RESPONSES:
|
return None
|
||||||
self.writeLog(url)
|
if LOG_HTTP_RESPONSES:
|
||||||
self.writeLog(response.text)
|
self.writeLog(url)
|
||||||
while -1!= searchIndex:
|
self.writeLog(response.text)
|
||||||
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
|
while -1!= searchIndex:
|
||||||
if video is not None and not (video.description in videos):
|
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
|
||||||
videos[video.description]=video
|
if video is not None and not (video.description in videos):
|
||||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
videos[video.description]=video
|
||||||
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
|
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
|
||||||
self.writeFeedCache(cachePathFileName,videoList)
|
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||||
return (videoList)
|
self.writeFeedCache(cachePathFileName,videoList)
|
||||||
|
return (videoList)
|
||||||
|
finally:
|
||||||
|
if None!=response:
|
||||||
|
response.close()
|
||||||
|
|
||||||
def filterFeedMaxDays(self, videos, days):
|
def filterFeedMaxDays(self, videos, days):
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
@@ -126,89 +138,97 @@ class NewsFeed:
|
|||||||
if delta.days <= days:
|
if delta.days <= days:
|
||||||
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||||
self.writeLog(message)
|
self.writeLog(message)
|
||||||
filteredList.insert(0,video)
|
filteredList.append(video)
|
||||||
else:
|
else:
|
||||||
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
|
||||||
self.writeLog(message)
|
self.writeLog(message)
|
||||||
return filteredList
|
return filteredList
|
||||||
|
|
||||||
def getUSItemsInFeed(self,url):
|
def getUSItemsInFeed(self,url):
|
||||||
now=datetime.now()
|
response = None
|
||||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
|
try:
|
||||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
now=datetime.now()
|
||||||
videos=self.readFeedCache(cachePathFileName)
|
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
|
||||||
if videos is not None:
|
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||||
return(videos)
|
videos=self.readFeedCache(cachePathFileName)
|
||||||
sections=Sections()
|
if videos is not None:
|
||||||
videos = {}
|
return(videos)
|
||||||
httpNetRequest=HttpNetRequest()
|
sections=Sections()
|
||||||
response=httpNetRequest.getHttpNetRequest(url)
|
videos = {}
|
||||||
status=response.status_code
|
|
||||||
searchIndex=0
|
|
||||||
response.close()
|
|
||||||
if status!=200:
|
|
||||||
return None
|
|
||||||
if LOG_HTTP_RESPONSES:
|
|
||||||
self.writeLog(url)
|
|
||||||
self.writeLog(response.text)
|
|
||||||
while -1!= searchIndex:
|
|
||||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
|
||||||
if videoId is None:
|
|
||||||
continue
|
|
||||||
url='https://video.foxnews.com/v/'+videoId
|
|
||||||
httpNetRequest=HttpNetRequest()
|
httpNetRequest=HttpNetRequest()
|
||||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
response=httpNetRequest.getHttpNetRequest(url)
|
||||||
status=innerResponse.status_code
|
status=response.status_code
|
||||||
innerResponse.close()
|
searchIndex=0
|
||||||
if status!=200:
|
if status!=200:
|
||||||
continue
|
return None
|
||||||
video=sections.getVideoContentInSection(innerResponse.text)
|
if LOG_HTTP_RESPONSES:
|
||||||
if video is not None and not (video.description in videos):
|
self.writeLog(url)
|
||||||
videos[video.description]=video
|
self.writeLog(response.text)
|
||||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
while -1!= searchIndex:
|
||||||
videoList=list(videos.values())
|
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
if videoId is None:
|
||||||
self.writeFeedCache(cachePathFileName,videoList)
|
continue
|
||||||
return (videoList)
|
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||||
|
httpNetRequest=HttpNetRequest()
|
||||||
|
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||||
|
status=innerResponse.status_code
|
||||||
|
innerResponse.close()
|
||||||
|
if status!=200:
|
||||||
|
continue
|
||||||
|
video=sections.getVideoContentInSection(innerResponse.text)
|
||||||
|
if video is not None and not (video.description in videos):
|
||||||
|
videos[video.description]=video
|
||||||
|
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||||
|
videoList=list(videos.values())
|
||||||
|
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||||
|
self.writeFeedCache(cachePathFileName,videoList)
|
||||||
|
return (videoList)
|
||||||
|
finally:
|
||||||
|
if None!=response:
|
||||||
|
response.close()
|
||||||
|
|
||||||
def getExclusiveItemsInFeed(self,url):
|
def getExclusiveItemsInFeed(self,url):
|
||||||
now=datetime.now()
|
response = None
|
||||||
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
|
try:
|
||||||
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
now=datetime.now()
|
||||||
videos=self.readFeedCache(cachePathFileName)
|
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
|
||||||
if videos is not None:
|
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
|
||||||
return(videos)
|
videos=self.readFeedCache(cachePathFileName)
|
||||||
sections=Sections()
|
if videos is not None:
|
||||||
videos = {}
|
return(videos)
|
||||||
httpNetRequest=HttpNetRequest()
|
sections=Sections()
|
||||||
response=httpNetRequest.getHttpNetRequest(url)
|
videos = {}
|
||||||
status=response.status_code
|
|
||||||
searchIndex=0
|
|
||||||
response.close()
|
|
||||||
if status!=200:
|
|
||||||
return None
|
|
||||||
if LOG_HTTP_RESPONSES:
|
|
||||||
self.writeLog(url)
|
|
||||||
self.writeLog(response.Text)
|
|
||||||
while -1!= searchIndex:
|
|
||||||
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
|
||||||
if videoId is None:
|
|
||||||
continue
|
|
||||||
url='https://video.foxnews.com/v/'+videoId
|
|
||||||
httpNetRequest=HttpNetRequest()
|
httpNetRequest=HttpNetRequest()
|
||||||
innerResponse=httpNetRequest.getHttpNetRequest(url)
|
response=httpNetRequest.getHttpNetRequest(url)
|
||||||
status=innerResponse.status_code
|
status=response.status_code
|
||||||
innerResponse.close()
|
searchIndex=0
|
||||||
if status!=200:
|
if status!=200:
|
||||||
continue
|
return None
|
||||||
video=sections.getVideoContentInSection(innerResponse.text)
|
if LOG_HTTP_RESPONSES:
|
||||||
if video is not None and not (video.description in videos):
|
self.writeLog(url)
|
||||||
videos[video.description]=video
|
self.writeLog(response.text)
|
||||||
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
while -1!= searchIndex:
|
||||||
videoList=list(videos.values())
|
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
|
||||||
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
if videoId is None:
|
||||||
self.writeFeedCache(cachePathFileName,videoList)
|
continue
|
||||||
return (videoList)
|
videoUrl='https://video.foxnews.com/v/'+videoId
|
||||||
|
httpNetRequest=HttpNetRequest()
|
||||||
|
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
|
||||||
|
status=innerResponse.status_code
|
||||||
|
innerResponse.close()
|
||||||
|
if status!=200:
|
||||||
|
continue
|
||||||
|
video=sections.getVideoContentInSection(innerResponse.text)
|
||||||
|
if video is not None and not (video.description in videos):
|
||||||
|
videos[video.description]=video
|
||||||
|
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
|
||||||
|
videoList=list(videos.values())
|
||||||
|
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
|
||||||
|
self.writeFeedCache(cachePathFileName,videoList)
|
||||||
|
return (videoList)
|
||||||
|
finally:
|
||||||
|
if None!=response:
|
||||||
|
response.close()
|
||||||
|
|
||||||
def getItemsInArchiveFeed(self,url,archiveDbFileName):
|
def getItemsInArchiveFeed(self,url,archiveDbFileName):
|
||||||
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
|
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
|
||||||
@@ -220,11 +240,11 @@ class NewsFeed:
|
|||||||
def readFeedCache(self,pathFileName):
|
def readFeedCache(self,pathFileName):
|
||||||
try:
|
try:
|
||||||
videos=[]
|
videos=[]
|
||||||
|
# 'with' will automatically close the stream
|
||||||
with open(pathFileName,"r",encoding='utf-8') as inputStream:
|
with open(pathFileName,"r",encoding='utf-8') as inputStream:
|
||||||
for line in inputStream:
|
for line in inputStream:
|
||||||
video=Video.fromString(line)
|
video=Video.fromString(line)
|
||||||
videos.append(video)
|
videos.append(video)
|
||||||
inputStream.close()
|
|
||||||
return(videos)
|
return(videos)
|
||||||
except:
|
except:
|
||||||
self.writeLog(traceback.format_exc())
|
self.writeLog(traceback.format_exc())
|
||||||
@@ -235,34 +255,35 @@ class NewsFeed:
|
|||||||
with open(pathFileName,"w",encoding='utf-8') as outputStream:
|
with open(pathFileName,"w",encoding='utf-8') as outputStream:
|
||||||
for video in videos:
|
for video in videos:
|
||||||
outputStream.write(video.toString()+"\n")
|
outputStream.write(video.toString()+"\n")
|
||||||
outputStream.close()
|
# 'with' will automatically close the stream
|
||||||
return(videos)
|
return(videos)
|
||||||
except:
|
except:
|
||||||
self.writeLog(traceback.format_exc())
|
self.writeLog(traceback.format_exc())
|
||||||
return(videos)
|
return(videos)
|
||||||
|
|
||||||
def isFeedCacheAvailable(self,pathFileName,expireMinutes):
|
def isFeedCacheAvailable(self, pathFileName, expireMinutes):
|
||||||
try:
|
try:
|
||||||
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
|
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
|
||||||
if not os.path.isfile(pathFileName):
|
if not os.path.isfile(pathFileName):
|
||||||
return(False)
|
return False
|
||||||
modifiedTime=os.path.getmtime(pathFileName)
|
modifiedTime = os.path.getmtime(pathFileName)
|
||||||
convertTime=time.localtime(modifiedTime)
|
convertTime = time.localtime(modifiedTime)
|
||||||
formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
|
formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
|
||||||
fileDateTime=time.strptime(formatTime,'%d%m%Y %H:%M:%S')
|
fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
|
||||||
currentTime=datetime.now()
|
currentTime = datetime.now()
|
||||||
timedelta=currentTime-datetime(*(fileDateTime[0:6]))
|
elapsed = currentTime - datetime(*(fileDateTime[0:6]))
|
||||||
hours, hremainder = divmod(timedelta.seconds,3600)
|
totalSeconds = int(elapsed.total_seconds())
|
||||||
minutes, mremainder = divmod(timedelta.seconds,60)
|
hours, remainder = divmod(totalSeconds, 3600)
|
||||||
self.writeLog('file is = "{age}" hours old'.format(age=hours))
|
minutes, _ = divmod(remainder, 60)
|
||||||
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
|
self.writeLog('file is = "{age}" hours old'.format(age=hours))
|
||||||
if hours > 1 or minutes > expireMinutes:
|
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
|
||||||
self.archiveFile(pathFileName)
|
if hours > 1 or minutes > expireMinutes:
|
||||||
return(False)
|
self.archiveFile(pathFileName)
|
||||||
return (True)
|
return False
|
||||||
|
return True
|
||||||
except:
|
except:
|
||||||
self.writeLog(traceback.format_exc());
|
self.writeLog(traceback.format_exc())
|
||||||
return(False)
|
return False
|
||||||
|
|
||||||
def archiveFile(self, pathFileName):
|
def archiveFile(self, pathFileName):
|
||||||
if not os.path.isfile(pathFileName):
|
if not os.path.isfile(pathFileName):
|
||||||
@@ -316,25 +337,35 @@ class Sections:
|
|||||||
if "tokenvod" in previewUrl:
|
if "tokenvod" in previewUrl:
|
||||||
return video, searchIndex
|
return video, searchIndex
|
||||||
|
|
||||||
indexDescription=strContainingString.index("alt=\"")
|
# Handle video description
|
||||||
|
indexDescription=strContainingString.find("alt=\"")
|
||||||
|
if -1 == indexDescription:
|
||||||
|
return video, searchIndex
|
||||||
description=strContainingString[indexDescription:]
|
description=strContainingString[indexDescription:]
|
||||||
description=self.betweenString(description,'"','"')
|
description=self.betweenString(description,'"','"')
|
||||||
description=self.removeHtml(description)
|
description=self.removeHtml(description)
|
||||||
description=description.replace("- Fox News","")
|
description=description.replace("- Fox News","")
|
||||||
if "vod.foxbusiness" in description:
|
if "vod.foxbusiness" in description:
|
||||||
return video, searchIndex
|
return video, searchIndex
|
||||||
indexDuration=strContainingString.index("<div class=\"duration\">")
|
|
||||||
|
# Handle video duration
|
||||||
|
indexDuration=strContainingString.find("<div class=\"duration\">")
|
||||||
if -1 != indexDuration:
|
if -1 != indexDuration:
|
||||||
strDuration=strContainingString[indexDuration:]
|
strDuration=strContainingString[indexDuration:]
|
||||||
strDuration=self.betweenString(strDuration,">","<")
|
strDuration=self.betweenString(strDuration,">","<")
|
||||||
description=description+" - "+strDuration
|
description=description+" - "+strDuration
|
||||||
indexPublication=strContainingString.index("<div class=\"pub-date\">")
|
|
||||||
|
# Handle video publication
|
||||||
|
strPublication = ""
|
||||||
|
indexPublication=strContainingString.find("<div class=\"pub-date\">")
|
||||||
if -1 != indexPublication:
|
if -1 != indexPublication:
|
||||||
strPublication=strContainingString[indexPublication:]
|
strPublication=strContainingString[indexPublication:]
|
||||||
strPublication=self.betweenString(strPublication,"<time>","</time>")
|
strPublication=self.betweenString(strPublication,"<time>","</time>")
|
||||||
description=description+" ("+strPublication+")"
|
description=description+" ("+strPublication+")"
|
||||||
|
|
||||||
|
# Handle the icon
|
||||||
icon=None
|
icon=None
|
||||||
indexIcon=strContainingString.index("srcset=")
|
indexIcon=strContainingString.find("srcset=")
|
||||||
if -1 != indexIcon:
|
if -1 != indexIcon:
|
||||||
icon=strContainingString[indexIcon:]
|
icon=strContainingString[indexIcon:]
|
||||||
icon=self.betweenString(icon,"\"","\"")
|
icon=self.betweenString(icon,"\"","\"")
|
||||||
@@ -421,10 +452,12 @@ class Sections:
|
|||||||
for code in codes:
|
for code in codes:
|
||||||
strItem=strItem.replace(code,"'")
|
strItem=strItem.replace(code,"'")
|
||||||
strItem=strItem.replace("&","&")
|
strItem=strItem.replace("&","&")
|
||||||
strItem=strItem.replace("‘","'")
|
strItem=strItem.replace("‘","‘")
|
||||||
strItem=strItem.replace("’","'")
|
strItem=strItem.replace("’","’")
|
||||||
strItem=strItem.replace("—","-")
|
strItem=strItem.replace("—","-")
|
||||||
|
strItem=strItem.replace("'","'")
|
||||||
strItem=strItem.replace("???","'")
|
strItem=strItem.replace("???","'")
|
||||||
|
strItem=strItem.replace(""","\"")
|
||||||
return strItem
|
return strItem
|
||||||
|
|
||||||
def pad(str,filler,length):
|
def pad(str,filler,length):
|
||||||
|
|||||||
37
utility.py
37
utility.py
@@ -36,29 +36,34 @@ class StringHelper:
|
|||||||
def betweenString(strItem, strBegin, strEnd):
|
def betweenString(strItem, strBegin, strEnd):
|
||||||
if strItem is None:
|
if strItem is None:
|
||||||
return None
|
return None
|
||||||
index=-1
|
index = -1
|
||||||
if strBegin is None:
|
if strBegin is None:
|
||||||
index=0
|
index = 0
|
||||||
else:
|
else:
|
||||||
index = strItem.index(strBegin)
|
try:
|
||||||
if -1==index:
|
if strBegin.startswith("<") and strBegin.endswith(">"):
|
||||||
|
tag_name = strBegin[1:-1] # e.g. "time"
|
||||||
|
index = strItem.index("<" + tag_name)
|
||||||
|
index = strItem.index(">", index) + 1
|
||||||
|
else:
|
||||||
|
index = strItem.index(strBegin) + len(strBegin)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
if index == -1:
|
||||||
return None
|
return None
|
||||||
str=None
|
str = strItem[index:] if strBegin is not None else strItem
|
||||||
if strBegin is not None:
|
|
||||||
str=strItem[index+len(strBegin):]
|
|
||||||
else:
|
|
||||||
str=strItem
|
|
||||||
if strEnd is None:
|
if strEnd is None:
|
||||||
return str
|
return str
|
||||||
index=str.index(strEnd)
|
try:
|
||||||
if -1==index :
|
index = str.index(strEnd)
|
||||||
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
sb=""
|
sb = ""
|
||||||
for strIndex in range(0, len(str)-1):
|
for strIndex in range(0, len(str) - 1):
|
||||||
if index==strIndex:
|
if index == strIndex:
|
||||||
break
|
break
|
||||||
sb=sb+str[strIndex]
|
sb = sb + str[strIndex]
|
||||||
return (sb)
|
return sb
|
||||||
|
|
||||||
class HttpNetRequest:
|
class HttpNetRequest:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user