Newsfeed/newsfeed.py

import json
import os
import webbrowser
import requests
import traceback
import time
import re
import glob
import shutil
from datetime import timedelta
from datetime import datetime
from datetime import timezone
from environment import *
from utility import *
from video import *

class NewsFeed:
    def __init__(self, pathDb, logger=None):
        self.pathDb=pathDb
        self.logger=logger

    @staticmethod
    def isResourceAvailable(url):
        try:
          response=requests.head(url, timeout=2.5)
          if not response.ok:
              return False
          return True
        except:
          return False

    def getItemsInAmericasNewsRoomFeed(self,url):
      response = None
      try:
        now=datetime.now()
        cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
        if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
          videos=self.readFeedCache(cachePathFileName)
          if videos is not None:
              return(videos)
        sections=Sections()
        videos = {}
        httpNetRequest=HttpNetRequest()
        response=httpNetRequest.getHttpNetRequest(url)
        status=response.status_code
        searchIndex=0
        if status!=200:
            return None
        if LOG_HTTP_RESPONSES:
          self.writeLog(url)
          self.writeLog(response.text)
        while -1!= searchIndex:
          video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
          if video is not None and not (video.description in videos):
              videos[video.description]=video
              video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
        videoList=list(videos.values())
        videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
        self.writeFeedCache(cachePathFileName,videoList)
        return (videoList)
      finally:
        if None!= response:
          response.close()

    def getItemsInOutnumberedFeed(self,url):
      response = None
      try:
        now=datetime.now()
        cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
        if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
          videos=self.readFeedCache(cachePathFileName)
          if videos is not None:
              return(videos)
        sections=Sections()
        videos = {}
        httpNetRequest=HttpNetRequest()
        response=httpNetRequest.getHttpNetRequest(url)
        status=response.status_code
        searchIndex=0
        if status!=200:
            return None
        if LOG_HTTP_RESPONSES:
          self.writeLog(url)
          self.writeLog(response.text)
        while -1!= searchIndex:
          video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
          if video is not None and not (video.description in videos):
              videos[video.description]=video
              video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
        videoList=list(videos.values())
        videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
        self.writeFeedCache(cachePathFileName,videoList)
        return (videoList)
      finally:
        if None!=response:
          response.close()

    def getItemsInFeed(self,url):
      response = None
      try:
        now=datetime.now()
        cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
        if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
          self.writeLog(f"Loading videos from cache {cachePathFileName}")
          videos=self.readFeedCache(cachePathFileName)
          if videos is not None:
              return(videos)
        sections=Sections()
        videos = {}
        httpNetRequest=HttpNetRequest()
        self.writeLog(f"Loading videos from {url}")
        response=httpNetRequest.getHttpNetRequest(url)
        status=response.status_code
        searchIndex=0
        if status!=200:
            return None
        if LOG_HTTP_RESPONSES:
          self.writeLog(url)
          self.writeLog(response.text)
        while -1!= searchIndex:
          video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
          if video is not None and not (video.description in videos):
              videos[video.description]=video
              video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
        videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
        videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
        self.writeFeedCache(cachePathFileName,videoList)
        return (videoList)
      finally:
        if None!=response:
          response.close()

    def filterFeedMaxDays(self, videos, days):
       now = datetime.now()
       filteredList=[]
       for video in videos:
          delta = now - video.getFeedTime()
          if delta.days <= days:
             message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}',  description={video.description}"
             self.writeLog(message)
             filteredList.append(video)
          else:
             message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}',  description={video.description}"
             self.writeLog(message)
       return filteredList

    def getUSItemsInFeed(self,url):
      response = None
      try:
        now=datetime.now()
        cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
        if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
          videos=self.readFeedCache(cachePathFileName)
          if videos is not None:
              return(videos)
        sections=Sections()
        videos = {}
        httpNetRequest=HttpNetRequest()
        response=httpNetRequest.getHttpNetRequest(url)
        status=response.status_code
        searchIndex=0
        if status!=200:
            return None
        if LOG_HTTP_RESPONSES:
          self.writeLog(url)
          self.writeLog(response.text)
        while -1!= searchIndex:
          videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
          if videoId is None:
              continue
          videoUrl='https://video.foxnews.com/v/'+videoId
          httpNetRequest=HttpNetRequest()
          innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
          status=innerResponse.status_code
          innerResponse.close()
          if status!=200:
              continue
          video=sections.getVideoContentInSection(innerResponse.text)
          if video is not None and not (video.description in videos):
              videos[video.description]=video
              video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
        videoList=list(videos.values())
        videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
        self.writeFeedCache(cachePathFileName,videoList)
        return (videoList)
      finally:
        if None!=response:
          response.close()

    def getExclusiveItemsInFeed(self,url):
      response = None
      try:
        now=datetime.now()
        cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
        if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
          videos=self.readFeedCache(cachePathFileName)
          if videos is not None:
              return(videos)
        sections=Sections()
        videos = {}
        httpNetRequest=HttpNetRequest()
        response=httpNetRequest.getHttpNetRequest(url)
        status=response.status_code
        searchIndex=0
        if status!=200:
            return None
        if LOG_HTTP_RESPONSES:
          self.writeLog(url)
          self.writeLog(response.text)
        while -1!= searchIndex:
          videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
          if videoId is None:
              continue
          videoUrl='https://video.foxnews.com/v/'+videoId
          httpNetRequest=HttpNetRequest()
          innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
          status=innerResponse.status_code
          innerResponse.close()
          if status!=200:
              continue
          video=sections.getVideoContentInSection(innerResponse.text)
          if video is not None and not (video.description in videos):
              videos[video.description]=video
              video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
        videoList=list(videos.values())
        videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
        self.writeFeedCache(cachePathFileName,videoList)
        return (videoList)
      finally:
        if None!=response:
          response.close()

    def getItemsInArchiveFeed(self,url,archiveDbFileName):
      cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
      videos=self.readFeedCache(cachePathFileName)
      if videos is not None:
        return(videos)
      return(None)

    def readFeedCache(self,pathFileName):
        try:
          videos=[]
# 'with' will automatically close the stream
          with open(pathFileName,"r",encoding='utf-8') as inputStream:
            for line in inputStream:
                video=Video.fromString(line)
                videos.append(video)
          return(videos)
        except:
          self.writeLog(traceback.format_exc())
          return(None)

    def writeFeedCache(self,pathFileName,videos):
        try:
          with open(pathFileName,"w",encoding='utf-8') as outputStream:
            for video in videos:
                outputStream.write(video.toString()+"\n")
# 'with' will automatically close the stream
          return(videos)
        except:
          self.writeLog(traceback.format_exc())
          return(videos)

    def isFeedCacheAvailable(self, pathFileName, expireMinutes):
        try:
            self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
            if not os.path.isfile(pathFileName):
                return False
            modifiedTime = os.path.getmtime(pathFileName)
            convertTime = time.localtime(modifiedTime)
            formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
            fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
            currentTime = datetime.now()
            elapsed = currentTime - datetime(*(fileDateTime[0:6]))
            totalSeconds = int(elapsed.total_seconds())
            hours, remainder = divmod(totalSeconds, 3600)
            minutes, _ = divmod(remainder, 60)
            self.writeLog('file is = "{age}" hours old'.format(age=hours))
            self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
            if hours > 1 or minutes > expireMinutes:
                self.archiveFile(pathFileName)
                return False
            return True
        except:
            self.writeLog(traceback.format_exc())
            return False

    def archiveFile(self, pathFileName):
        if not os.path.isfile(pathFileName):
          return(False)
        archiveFile=StringHelper.betweenString(pathFileName, None, '.txt')
        archiveFileLike=archiveFile+'.txt.*'
        files = glob.glob(archiveFileLike)
        index=len(files)+1
        archiveFileName=archiveFile+'.txt.'+str(index)
        print('archiveFile:  Copying "{pathFileName}" to "{archiveFileName}".'.format(pathFileName=pathFileName,archiveFileName=archiveFileName))
        shutil.copy(pathFileName,archiveFileName)
        os.remove(pathFileName)
        return(True)

    def writeLog(self,message):
        if self.logger is not None:
            self.logger.write(message)
        else:
            print(message)

class Sections:
    def __init__(self):
      self.dummy=None

    def getItemsInSection(self, strInput, sectionName, searchIndex):
        video=None
        startSection='<'+sectionName
        endSection='</'+sectionName

        startIndex=strInput.find(startSection,searchIndex)
        if -1 == startIndex:
            searchIndex=-1
            return video, searchIndex

        endIndex=strInput.find(endSection,startIndex)
        if -1 == endIndex:
            searchIndex=-1
            return video, searchIndex

        searchIndex=endIndex+len(endSection)
        strContainingString=strInput[startIndex:endIndex+1+len(endSection)]

        if not strContainingString or strContainingString=="":
            return video, searchIndex

        indexPreview=strContainingString.find("preview=\"")
        if -1 == indexPreview:
            return video, searchIndex
        previewUrl=strContainingString[indexPreview:]
        previewUrl=self.betweenString(previewUrl,'"','"')
        if "tokenvod" in previewUrl:
            return video, searchIndex

# Handle video description
        indexDescription=strContainingString.find("alt=\"")
        if -1 == indexDescription:
          return video, searchIndex
        description=strContainingString[indexDescription:]
        description=self.betweenString(description,'"','"')
        description=self.removeHtml(description)
        description=description.replace("- Fox News","")
        if "vod.foxbusiness" in description:
            return video, searchIndex

# Handle video duration
        indexDuration=strContainingString.find("<div class=\"duration\">")
        if -1 != indexDuration:
            strDuration=strContainingString[indexDuration:]
            strDuration=self.betweenString(strDuration,">","<")
            description=description+" - "+strDuration

# Handle video publication
        strPublication = ""
        indexPublication=strContainingString.find("<div class=\"pub-date\">")
        if -1 != indexPublication:
            strPublication=strContainingString[indexPublication:]
            strPublication=self.betweenString(strPublication,"<time>","</time>")
            description=description+" ("+strPublication+")"

# Handle the icon
        icon=None
        indexIcon=strContainingString.find("srcset=")
        if -1 != indexIcon:
            icon=strContainingString[indexIcon:]
            icon=self.betweenString(icon,"\"","\"")
            splits=icon.split(',')
            icon=self.betweenString(splits[len(splits)-1],None,'?')
            icon=icon.strip()
        description = description.strip()
        video=Video(description,previewUrl,icon)
        video.feedTimeOffset=strPublication
        return video, searchIndex

    def getVideoIdInSection(self, strInput, sectionName, searchIndex):
        video=None
        startSection='<'+sectionName
        endSection='</'+sectionName

        startIndex=strInput.find(startSection,searchIndex)
        if -1 == startIndex:
            searchIndex=-1
            return video, searchIndex

        endIndex=strInput.find(endSection,startIndex)
        if -1 == endIndex:
            searchIndex=-1
            return video, searchIndex

        searchIndex=endIndex+len(endSection)
        strContainingString=strInput[startIndex:endIndex+1+len(endSection)]
        if not strContainingString or strContainingString=="":
            return video, searchIndex
        indexVideoId=strContainingString.find("data-video-id")
        if -1 ==indexVideoId:
            return video, searchIndex
        videoId=strContainingString[indexVideoId:]
        videoId=self.betweenString(videoId,"\"","\"")
        return videoId, searchIndex

    def getVideoContentInSection(self, strInput):
        video=None
        searchItem="\"contentUrl\":"
        indexContentUrl=strInput.find(searchItem)
        if -1 == indexContentUrl:
            return None
        strContentUrl=strInput[indexContentUrl+len(searchItem):]
        strContentUrl=self.betweenString(strContentUrl,"\"","\"")
        strContentUrl=strContentUrl.strip()

        searchItem="\"description\":"
        indexDescription=strInput.find(searchItem)
        if -1 == indexDescription:
            return None
        strDescription=strInput[indexDescription+len(searchItem):]
        strDescription=self.betweenString(strDescription,"\"","\"")
        strDescription=strDescription.strip()

        searchItem="\"thumbnailUrl\":"
        indexIcon=strInput.find(searchItem)
        if -1 == indexIcon:
            return None
        strIcon=strInput[indexIcon+len(searchItem):]
        strIcon=self.betweenString(strIcon,"\"","\"")
        strIcon=strIcon.strip()

        searchItem="\"duration\""
        indexDuration=strInput.find(searchItem)
        if -1 != indexDuration:
            strDuration=strInput[indexDuration+len(searchItem):]
            strDuration=self.betweenString(strDuration,"\"","\"")
            strDuration=strDuration.strip()
            minutes, seconds = parseDuration(strDuration)
            if None!=minutes and None!=seconds:
                strDescription=strDescription+" - "+minutes+":"+seconds
        strDescription = strDescription.strip()
        video=Video(strDescription,strContentUrl,strIcon)
        return video

    def betweenString(self, strItem, strBegin, strEnd ):
        return StringHelper.betweenString(strItem, strBegin, strEnd)

    def removeHtml(self,strItem):
        if strItem is None:
            return None
        codes={"&#x27;","&#187;"}
        for code in codes:
            strItem=strItem.replace(code,"'")
        strItem=strItem.replace("&amp;","&")
        strItem=strItem.replace("&#x2018;","‘")
        strItem=strItem.replace("&#x2019;","’")
        strItem=strItem.replace("&#x2014;","-")
        strItem=strItem.replace("&#39;","'")
        strItem=strItem.replace("???","'")
        strItem=strItem.replace("&quot;","\"")
        return strItem

def pad(str,filler,length):
    stringLength=len(str)
    sb=""
    if stringLength>=length:
        return str
    while stringLength < length:
        sb=sb+filler
        stringLength=stringLength+1
    return sb+str

def parseDuration(strDuration):
    expression=re.compile(r"\d+")
    result=expression.findall(strDuration)
    if 2!=len(result):
        return None, None
    return pad(result[0],'0',2), pad(result[1],'0',2)


# DON'T LEAVE ANYTHING OPEN BELOW THIS LINE BECAUSE THIS FILE IS IMPORTED BY OTHER MODULES AND ANY CODE NOT IN A CLASS WILL BE RUN
# strdate = "January 1, 2026"
# if DateTimeHelper.canstrptimeex(strdate):
#   theDate  = DateTimeHelper.strptimeex(strdate)
#   if(not isinstance(theDate,datetime)):
#     raise Exception('Invalid type for parameter')


# feedTimeOffset = "January 13, 2025"
# currentTime = datetime.now()
# for i in range(1,100):
#    relativeTime = DateTimeHelper.applyRelativeTime(currentTime,feedTimeOffset)
#    print(relativeTime)

#print(FOX_NEWS_URL)
# pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt'
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# newsFeed.ArchiveFile(pathFileName)


# pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt'
# modifiedTime=os.path.getmtime(pathFileName)
# convertTime=time.localtime(modifiedTime)
# formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
# fileDateTime=DateTimeHelper.strptime(formatTime)

#fileDateTime=datetime.strptime(formatTime,'%d%m%Y %H:%M:%S')
#fileDateTime2=datetime(*(time.strptime(formatTime,'%d%m%Y %H:%M:%S')[0:6]))
#currentTime=datetime.now()

#Test the main feed
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/')
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt')
# newsFeed=NewsFeed(PATH_VIDEO_DATABASE, myLog())
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/', myLog())
# videos=newsFeed.getItemsInFeed(FOX_NEWS_URL)
# for video in videos:
#  if(video.description.startswith("Martha")):
#     print(f"Description={video.description}")
#     print(f"Url={video.url}")
#     print(f"getTimestamp={video.getTimestamp().toStringMonthDay()}")
#     print(f"getFeedTimeOffset={video.getFeedTimeOffset()}")
#     print(f"getFeedTime={video.getFeedTime()}")
#     print(f"daysOld={(datetime.now()-video.getFeedTime()).days}")
#     print(' ')

# pull the time out of the description and subtract it from the time we scanned the feed.
# the result will be the time of the article..use this to sort on.
# (i.e.) FeedTime:02/03/2023 12:00:00  Article Time:2 hours ago  Real time:10:00:00


#Test the exclusive items feed
#newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
#videos=newsFeed.getExclusiveItemsInFeed("https://www.foxnews.com")
# for video in videos:
#     print(video.description)


# Test the U.S. Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getUSItemsInFeed("https://www.foxnews.com/video/topics/us")
# for video in videos:
#     print(video.description)

# Test the America's NewsRoom Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getItemsInAmericasNewsRoomFeed("https://www.foxnews.com/video/shows/americas-newsroom")
# print('got {count} videos for America''s Newsroom'.format(count=len(videos)))
# for video in videos:
#     print(video.description)
#     print(video.url)

# Test the Outnumbered Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getItemsInOutnumbereFeed("https://www.foxnews.com/video/shows/outnumbered")
# print('got {count} videos for Outnumbered'.format(count=len(videos)))
# for video in videos:
#     print(video.description)
#     print(video.url)

#minutes, seconds = parseDuration('PT24M5S')
#print('Duration is {minutes}:{seconds}'.format(minutes=minutes,seconds=seconds))

# isoDate="2022-10-27T10:24:11Z".replace("Z","+00:00")
# articleTime=datetime.datetime.fromisoformat(isoDate)
# print('time:{time}'.format(time=articleTime))
# currentTime=Date.getCurrentTime()
# print('time:{time}'.format(time=currentTime))
# days, hours, minutes, seconds=Date.deltaTime(articleTime,currentTime)
# print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds))

# currentTime2=Date.getCurrentTime()
# strCurrentTime2=str(currentTime2)
# currentTime2=datetime.datetime.fromisoformat(strCurrentTime2)
# days, hours, minutes, seconds=Date.deltaTime(currentTime2,currentTime)
# print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds))

# dateList=[]

# currentDate=Date()
# dateList.append(currentDate)
# currentDate2=Date()
# dateList.append(currentDate2)

# dateList.sort(key=lambda x:x.toString())
# for date in dateList:
#     print(date.toString())
# #print(dateList)