Newsfeed/newsfeed.py

import json
import os
import webbrowser
import requests
import traceback
import time
import re
import glob
import shutil
from datetime import timedelta
from datetime import datetime
from datetime import timezone
from environment import *
from utility import *
from video import *

class NewsFeed:
    def __init__(self, pathDb, logger=None):
        self.pathDb=pathDb
        self.logger=logger

    @staticmethod
    def isResourceAvailable(url):
        try:
          response=requests.head(url, timeout=2.5)
          if not response.ok:
              return False
          return True
        except:
          return False

    def getItemsInAmericasNewsRoomFeed(self,url):
      now=datetime.now()
      cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
      if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
        videos=self.readFeedCache(cachePathFileName)
        if videos is not None:
            return(videos)
      sections=Sections()
      videos = {}
      httpNetRequest=HttpNetRequest()
      response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
      status=response.status_code
      searchIndex=0
      response.close()
      if status!=200:
          return None
      if LOG_HTTP_RESPONSES:
         self.writeLog(url)
         self.writeLog(response.text)
      while -1!= searchIndex:
        video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
        if video is not None and not (video.description in videos):
            videos[video.description]=video
            video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
      videoList=list(videos.values())
      videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
      self.writeFeedCache(cachePathFileName,videoList)
      return (videoList)

    def getItemsInOutnumberedFeed(self,url):
      now=datetime.now()
      cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
      if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
        videos=self.readFeedCache(cachePathFileName)
        if videos is not None:
            return(videos)
      sections=Sections()
      videos = {}
      httpNetRequest=HttpNetRequest()
      response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
      status=response.status_code
      searchIndex=0
      response.close()
      if status!=200:
          return None
      if LOG_HTTP_RESPONSES:
         self.writeLog(url)
         self.writeLog(response.text)
      while -1!= searchIndex:
        video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
        if video is not None and not (video.description in videos):
            videos[video.description]=video
            video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
      videoList=list(videos.values())
      videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
      self.writeFeedCache(cachePathFileName,videoList)
      return (videoList)

    def getItemsInFeed(self,url):
      now=datetime.now()
      cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
      if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
        self.writeLog(f"Loading videos from cache {cachePathFileName}")
        videos=self.readFeedCache(cachePathFileName)
        if videos is not None:
            return(videos)
      sections=Sections()
      videos = {}
      httpNetRequest=HttpNetRequest()
      self.writeLog(f"Loading videos from {url}")
      response=httpNetRequest=httpNetRequest.getHttpNetRequest(url)
      status=response.status_code
      searchIndex=0
      response.close()
      if status!=200:
          return None
      if LOG_HTTP_RESPONSES:
         self.writeLog(url)
         self.writeLog(response.text)
      while -1!= searchIndex:
        video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
        if video is not None and not (video.description in videos):
            videos[video.description]=video
            video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
      videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
      videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
      self.writeFeedCache(cachePathFileName,videoList)
      return (videoList)

    def filterFeedMaxDays(self, videos, days):
       now = datetime.now()
       filteredList=[]
       for video in videos:
          delta = now - video.getFeedTime()
          if delta.days <= days:
             message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}',  description={video.description}"
             self.writeLog(message)
             filteredList.insert(0,video)
          else:
             message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}',  description={video.description}"
             self.writeLog(message)
       return filteredList

    def getUSItemsInFeed(self,url):
      now=datetime.now()
      cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
      if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
        videos=self.readFeedCache(cachePathFileName)
        if videos is not None:
            return(videos)
      sections=Sections()
      videos = {}
      httpNetRequest=HttpNetRequest()
      response=httpNetRequest.getHttpNetRequest(url)
      status=response.status_code
      searchIndex=0
      response.close()
      if status!=200:
          return None
      if LOG_HTTP_RESPONSES:
         self.writeLog(url)
         self.writeLog(response.text)
      while -1!= searchIndex:
        videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
        if videoId is None:
            continue
        url='https://video.foxnews.com/v/'+videoId
        httpNetRequest=HttpNetRequest()
        innerResponse=httpNetRequest.getHttpNetRequest(url)
        status=innerResponse.status_code
        innerResponse.close()
        if status!=200:
            continue
        video=sections.getVideoContentInSection(innerResponse.text)
        if video is not None and not (video.description in videos):
            videos[video.description]=video
            video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
      videoList=list(videos.values())
      videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
      self.writeFeedCache(cachePathFileName,videoList)
      return (videoList)

    def getExclusiveItemsInFeed(self,url):
      now=datetime.now()
      cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
      if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
        videos=self.readFeedCache(cachePathFileName)
        if videos is not None:
            return(videos)
      sections=Sections()
      videos = {}
      httpNetRequest=HttpNetRequest()
      response=httpNetRequest.getHttpNetRequest(url)
      status=response.status_code
      searchIndex=0
      response.close()
      if status!=200:
          return None
      if LOG_HTTP_RESPONSES:
         self.writeLog(url)
         self.writeLog(response.Text)
      while -1!= searchIndex:
        videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
        if videoId is None:
            continue
        url='https://video.foxnews.com/v/'+videoId
        httpNetRequest=HttpNetRequest()
        innerResponse=httpNetRequest.getHttpNetRequest(url)
        status=innerResponse.status_code
        innerResponse.close()
        if status!=200:
            continue
        video=sections.getVideoContentInSection(innerResponse.text)
        if video is not None and not (video.description in videos):
            videos[video.description]=video
            video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
      videoList=list(videos.values())
      videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
      self.writeFeedCache(cachePathFileName,videoList)
      return (videoList)

    def getItemsInArchiveFeed(self,url,archiveDbFileName):
      cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
      videos=self.readFeedCache(cachePathFileName)
      if videos is not None:
        return(videos)
      return(None)

    def readFeedCache(self,pathFileName):
        try:
          videos=[]
          with open(pathFileName,"r",encoding='utf-8') as inputStream:
            for line in inputStream:
                video=Video.fromString(line)
                videos.append(video)
          inputStream.close()
          return(videos)
        except:
          self.writeLog(traceback.format_exc())
          return(None)

    def writeFeedCache(self,pathFileName,videos):
        try:
          with open(pathFileName,"w",encoding='utf-8') as outputStream:
            for video in videos:
                outputStream.write(video.toString()+"\n")
          outputStream.close()
          return(videos)
        except:
          self.writeLog(traceback.format_exc())
          return(videos)

    def isFeedCacheAvailable(self,pathFileName,expireMinutes):
        try:
          self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
          if not os.path.isfile(pathFileName):
            return(False)
          modifiedTime=os.path.getmtime(pathFileName)
          convertTime=time.localtime(modifiedTime)
          formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
          fileDateTime=DateTimeHelper.strptime(formatTime,'%d%m%Y %H:%M:%S')
          currentTime=datetime.now()
          timedelta=currentTime-fileDateTime
          hours, hremainder = divmod(timedelta.seconds,3600)
          minutes, mremainder = divmod(timedelta.seconds,60)
          self.writeLog('file is  = "{age}" hours old'.format(age=hours))
          self.writeLog('file is  = "{age}" minutes old'.format(age=minutes))
          if hours > 1 or minutes > expireMinutes:
            self.archiveFile(pathFileName)
            return(False)
          return (True)
        except:
          self.writeLog(traceback.format_exc());
          return(False)

    def archiveFile(self, pathFileName):
        if not os.path.isfile(pathFileName):
          return(False)
        archiveFile=StringHelper.betweenString(pathFileName, None, '.txt')
        archiveFileLike=archiveFile+'.txt.*'
        files = glob.glob(archiveFileLike)
        index=len(files)+1
        archiveFileName=archiveFile+'.txt.'+str(index)
        print('archiveFile:  Copying "{pathFileName}" to "{archiveFileName}".'.format(pathFileName=pathFileName,archiveFileName=archiveFileName))
        shutil.copy(pathFileName,archiveFileName)
        os.remove(pathFileName)
        return(True)

    def writeLog(self,message):
        if self.logger is not None:
            self.logger.write(message)
        else:
            print(message)

class Sections:
    def __init__(self):
      self.dummy=None

    def getItemsInSection(self, strInput, sectionName, searchIndex):
        video=None
        startSection='<'+sectionName
        endSection='</'+sectionName

        startIndex=strInput.find(startSection,searchIndex)
        if -1 == startIndex:
            searchIndex=-1
            return video, searchIndex

        endIndex=strInput.find(endSection,startIndex)
        if -1 == endIndex:
            searchIndex=-1
            return video, searchIndex

        searchIndex=endIndex+len(endSection)
        strContainingString=strInput[startIndex:endIndex+1+len(endSection)]

        if not strContainingString or strContainingString=="":
            return video, searchIndex

        indexPreview=strContainingString.find("preview=\"")
        if -1 == indexPreview:
            return video, searchIndex
        previewUrl=strContainingString[indexPreview:]
        previewUrl=self.betweenString(previewUrl,'"','"')
        if "tokenvod" in previewUrl:
            return video, searchIndex

        indexDescription=strContainingString.index("alt=\"")
        description=strContainingString[indexDescription:]
        description=self.betweenString(description,'"','"')
        description=self.removeHtml(description)
        description=description.replace("- Fox News","")
        if "vod.foxbusiness" in description:
            return video, searchIndex
        indexDuration=strContainingString.index("<div class=\"duration\">")
        if -1 != indexDuration:
            strDuration=strContainingString[indexDuration:]
            strDuration=self.betweenString(strDuration,">","<")
            description=description+" - "+strDuration
        indexPublication=strContainingString.index("<div class=\"pub-date\">")
        if -1 != indexPublication:
            strPublication=strContainingString[indexPublication:]
            strPublication=self.betweenString(strPublication,"<time>","</time>")
            description=description+" ("+strPublication+")"
        icon=None
        indexIcon=strContainingString.index("srcset=")
        if -1 != indexIcon:
            icon=strContainingString[indexIcon:]
            icon=self.betweenString(icon,"\"","\"")
            splits=icon.split(',')
            icon=self.betweenString(splits[len(splits)-1],None,'?')
            icon=icon.strip()
        description = description.strip()
        video=Video(description,previewUrl,icon)
        video.feedTimeOffset=strPublication
        return video, searchIndex

    def getVideoIdInSection(self, strInput, sectionName, searchIndex):
        video=None
        startSection='<'+sectionName
        endSection='</'+sectionName

        startIndex=strInput.find(startSection,searchIndex)
        if -1 == startIndex:
            searchIndex=-1
            return video, searchIndex

        endIndex=strInput.find(endSection,startIndex)
        if -1 == endIndex:
            searchIndex=-1
            return video, searchIndex

        searchIndex=endIndex+len(endSection)
        strContainingString=strInput[startIndex:endIndex+1+len(endSection)]
        if not strContainingString or strContainingString=="":
            return video, searchIndex
        indexVideoId=strContainingString.find("data-video-id")
        if -1 ==indexVideoId:
            return video, searchIndex
        videoId=strContainingString[indexVideoId:]
        videoId=self.betweenString(videoId,"\"","\"")
        return videoId, searchIndex

    def getVideoContentInSection(self, strInput):
        video=None
        searchItem="\"contentUrl\":"
        indexContentUrl=strInput.find(searchItem)
        if -1 == indexContentUrl:
            return None
        strContentUrl=strInput[indexContentUrl+len(searchItem):]
        strContentUrl=self.betweenString(strContentUrl,"\"","\"")
        strContentUrl=strContentUrl.strip()

        searchItem="\"description\":"
        indexDescription=strInput.find(searchItem)
        if -1 == indexDescription:
            return None
        strDescription=strInput[indexDescription+len(searchItem):]
        strDescription=self.betweenString(strDescription,"\"","\"")
        strDescription=strDescription.strip()

        searchItem="\"thumbnailUrl\":"
        indexIcon=strInput.find(searchItem)
        if -1 == indexIcon:
            return None
        strIcon=strInput[indexIcon+len(searchItem):]
        strIcon=self.betweenString(strIcon,"\"","\"")
        strIcon=strIcon.strip()

        searchItem="\"duration\""
        indexDuration=strInput.find(searchItem)
        if -1 != indexDuration:
            strDuration=strInput[indexDuration+len(searchItem):]
            strDuration=self.betweenString(strDuration,"\"","\"")
            strDuration=strDuration.strip()
            minutes, seconds = parseDuration(strDuration)
            if None!=minutes and None!=seconds:
                strDescription=strDescription+" - "+minutes+":"+seconds
        strDescription = strDescription.strip()
        video=Video(strDescription,strContentUrl,strIcon)
        return video

    def betweenString(self, strItem, strBegin, strEnd ):
        return StringHelper.betweenString(strItem, strBegin, strEnd)

    def removeHtml(self,strItem):
        if strItem is None:
            return None
        codes={"&#x27;","&#187;"}
        for code in codes:
            strItem=strItem.replace(code,"'")
        strItem=strItem.replace("&amp;","&")
        strItem=strItem.replace("&#x2018;","'")
        strItem=strItem.replace("&#x2019;","'")
        strItem=strItem.replace("&#x2014;","-")
        strItem=strItem.replace("???","'")
        return strItem

def pad(str,filler,length):
    stringLength=len(str)
    sb=""
    if stringLength>=length:
        return str
    while stringLength < length:
        sb=sb+filler
        stringLength=stringLength+1
    return sb+str

def parseDuration(strDuration):
    expression=re.compile(r"\d+")
    result=expression.findall(strDuration)
    if 2!=len(result):
        return None, None
    return pad(result[0],'0',2), pad(result[1],'0',2)


# DON'T LEAVE ANYTHING OPEN BELOW THIS LINE BECAUSE THIS FILE IS IMPORTED BY OTHER MODULES AND ANY CODE NOT IN A CLASS WILL BE RUN
# strdate = "January 1, 2026"
# if DateTimeHelper.canstrptimeex(strdate):
#   theDate  = DateTimeHelper.strptimeex(strdate)
#   if(not isinstance(theDate,datetime)):
#     raise Exception('Invalid type for parameter')


# feedTimeOffset = "January 13, 2025"
# currentTime = datetime.now()
# for i in range(1,100):
#    relativeTime = DateTimeHelper.applyRelativeTime(currentTime,feedTimeOffset)
#    print(relativeTime)

#print(FOX_NEWS_URL)
# pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt'
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# newsFeed.ArchiveFile(pathFileName)


# pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt'
# modifiedTime=os.path.getmtime(pathFileName)
# convertTime=time.localtime(modifiedTime)
# formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
# fileDateTime=DateTimeHelper.strptime(formatTime)

#fileDateTime=datetime.strptime(formatTime,'%d%m%Y %H:%M:%S')
#fileDateTime2=datetime(*(time.strptime(formatTime,'%d%m%Y %H:%M:%S')[0:6]))
#currentTime=datetime.now()

#Test the main feed
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/')
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt')
# newsFeed=NewsFeed(PATH_VIDEO_DATABASE, myLog())
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/', myLog())
# videos=newsFeed.getItemsInFeed(FOX_NEWS_URL)
# for video in videos:
#  if(video.description.startswith("Martha")):
#     print(f"Description={video.description}")
#     print(f"Url={video.url}")
#     print(f"getTimestamp={video.getTimestamp().toStringMonthDay()}")
#     print(f"getFeedTimeOffset={video.getFeedTimeOffset()}")
#     print(f"getFeedTime={video.getFeedTime()}")
#     print(f"daysOld={(datetime.now()-video.getFeedTime()).days}")
#     print(' ')

# pull the time out of the description and subtract it from the time we scanned the feed.
# the result will be the time of the article..use this to sort on.
# (i.e.) FeedTime:02/03/2023 12:00:00  Article Time:2 hours ago  Real time:10:00:00


#Test the exclusive items feed
#newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
#videos=newsFeed.getExclusiveItemsInFeed("https://www.foxnews.com")
# for video in videos:
#     print(video.description)


# Test the U.S. Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getUSItemsInFeed("https://www.foxnews.com/video/topics/us")
# for video in videos:
#     print(video.description)

# Test the America's NewsRoom Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getItemsInAmericasNewsRoomFeed("https://www.foxnews.com/video/shows/americas-newsroom")
# print('got {count} videos for America''s Newsroom'.format(count=len(videos)))
# for video in videos:
#     print(video.description)
#     print(video.url)

# Test the Outnumbered Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getItemsInOutnumbereFeed("https://www.foxnews.com/video/shows/outnumbered")
# print('got {count} videos for Outnumbered'.format(count=len(videos)))
# for video in videos:
#     print(video.description)
#     print(video.url)

#minutes, seconds = parseDuration('PT24M5S')
#print('Duration is {minutes}:{seconds}'.format(minutes=minutes,seconds=seconds))

# isoDate="2022-10-27T10:24:11Z".replace("Z","+00:00")
# articleTime=datetime.datetime.fromisoformat(isoDate)
# print('time:{time}'.format(time=articleTime))
# currentTime=Date.getCurrentTime()
# print('time:{time}'.format(time=currentTime))
# days, hours, minutes, seconds=Date.deltaTime(articleTime,currentTime)
# print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds))

# currentTime2=Date.getCurrentTime()
# strCurrentTime2=str(currentTime2)
# currentTime2=datetime.datetime.fromisoformat(strCurrentTime2)
# days, hours, minutes, seconds=Date.deltaTime(currentTime2,currentTime)
# print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds))

# dateList=[]

# currentDate=Date()
# dateList.append(currentDate)
# currentDate2=Date()
# dateList.append(currentDate2)

# dateList.sort(key=lambda x:x.toString())
# for date in dateList:
#     print(date.toString())
# #print(dateList)