compare_cc_files_to_fs.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. import os
  2. import time
  3. import shutil
  4. import sys
  5. import logging
  6. from configobj import ConfigObj
  7. from subprocess import Popen, PIPE
  8. from api_clients import api_client as apc
  9. """
  10. The purpose of this script is that you can run it, and it will compare what the database has to what your filesystem
  11. has. It will then report if there are any differences. It will *NOT* make any changes, unlike media-monitor which uses
  12. similar code when it starts up (but then makes changes if something is different)
  13. """
  14. class AirtimeMediaMonitorBootstrap():
  15. """AirtimeMediaMonitorBootstrap constructor
  16. Keyword Arguments:
  17. logger -- reference to the media-monitor logging facility
  18. pe -- reference to an instance of ProcessEvent
  19. api_clients -- reference of api_clients to communicate with airtime-server
  20. """
  21. def __init__(self):
  22. config = ConfigObj('/etc/airtime/airtime.conf')
  23. self.api_client = apc.api_client_factory(config)
  24. """
  25. try:
  26. logging.config.fileConfig("logging.cfg")
  27. except Exception, e:
  28. print 'Error configuring logging: ', e
  29. sys.exit(1)
  30. """
  31. self.logger = logging.getLogger()
  32. self.logger.info("Adding %s on watch list...", "xxx")
  33. self.scan()
  34. """On bootup we want to scan all directories and look for files that
  35. weren't there or files that changed before media-monitor process
  36. went offline.
  37. """
  38. def scan(self):
  39. directories = self.get_list_of_watched_dirs();
  40. self.logger.info("watched directories found: %s", directories)
  41. for id, dir in directories.iteritems():
  42. self.logger.debug("%s, %s", id, dir)
  43. #CHANGED!!!
  44. #self.sync_database_to_filesystem(id, api_client.encode_to(dir, "utf-8"))
  45. self.sync_database_to_filesystem(id, dir)
  46. """Gets a list of files that the Airtime database knows for a specific directory.
  47. You need to provide the directory's row ID, which is obtained when calling
  48. get_list_of_watched_dirs function.
  49. dir_id -- row id of the directory in the cc_watched_dirs database table
  50. """
  51. def list_db_files(self, dir_id):
  52. return self.api_client.list_all_db_files(dir_id)
  53. """
  54. returns the path and the database row id for this path for all watched directories. Also
  55. returns the Stor directory, which can be identified by its row id (always has value of "1")
  56. """
  57. def get_list_of_watched_dirs(self):
  58. json = self.api_client.list_all_watched_dirs()
  59. return json["dirs"]
  60. def scan_dir_for_existing_files(self, dir):
  61. command = 'find "%s" -type f -iname "*.ogg" -o -iname "*.mp3" -readable' % dir.replace('"', '\\"')
  62. self.logger.debug(command)
  63. #CHANGED!!
  64. stdout = self.exec_command(command).decode("UTF-8")
  65. return stdout.splitlines()
  66. def exec_command(self, command):
  67. p = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
  68. stdout, stderr = p.communicate()
  69. if p.returncode != 0:
  70. self.logger.warn("command \n%s\n return with a non-zero return value", command)
  71. self.logger.error(stderr)
  72. return stdout
  73. """
  74. This function takes in a path name provided by the database (and its corresponding row id)
  75. and reads the list of files in the local file system. Its purpose is to discover which files
  76. exist on the file system but not in the database and vice versa, as well as which files have
  77. been modified since the database was last updated. In each case, this method will call an
  78. appropiate method to ensure that the database actually represents the filesystem.
  79. dir_id -- row id of the directory in the cc_watched_dirs database table
  80. dir -- pathname of the directory
  81. """
  82. def sync_database_to_filesystem(self, dir_id, dir):
  83. """
  84. set to hold new and/or modified files. We use a set to make it ok if files are added
  85. twice. This is because some of the tests for new files return result sets that are not
  86. mutually exclusive from each other.
  87. """
  88. db_known_files_set = set()
  89. files = self.list_db_files(dir_id)
  90. for file in files['files']:
  91. db_known_files_set.add(file)
  92. existing_files = self.scan_dir_for_existing_files(dir)
  93. existing_files_set = set()
  94. for file_path in existing_files:
  95. if len(file_path.strip(" \n")) > 0:
  96. existing_files_set.add(file_path[len(dir):])
  97. deleted_files_set = db_known_files_set - existing_files_set
  98. new_files_set = existing_files_set - db_known_files_set
  99. print ("DB Known files: \n%s\n\n"%len(db_known_files_set))
  100. print ("FS Known files: \n%s\n\n"%len(existing_files_set))
  101. print ("Deleted files: \n%s\n\n"%deleted_files_set)
  102. print ("New files: \n%s\n\n"%new_files_set)
  103. if __name__ == "__main__":
  104. AirtimeMediaMonitorBootstrap()