utils_others.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import re
  2. import time
  3. import json
  4. import socket
  5. import logging
  6. import argparse
  7. import numbers
  8. import datetime
  9. def print_with_no(obj):
  10. if hasattr(obj, '__len__'):
  11. for k, item in enumerate(obj):
  12. print('[{}/{}] {}'.format(k+1, len(obj), item))
  13. elif hasattr(obj, '__iter__'):
  14. for k, item in enumerate(obj):
  15. print('[{}] {}'.format(k+1, item))
  16. else:
  17. print('[1] {}'.format(obj))
  18. def get_file_line_count(filename):
  19. line_count = 0
  20. buffer_size = 1024 * 1024 * 8
  21. with open(filename, 'r') as f:
  22. while True:
  23. data = f.read(buffer_size)
  24. if not data:
  25. break
  26. line_count += data.count('\n')
  27. return line_count
  28. def get_host_ip():
  29. try:
  30. s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  31. s.connect(('8.8.8.8', 80))
  32. ip = s.getsockname()[0]
  33. finally:
  34. s.close()
  35. return ip
  36. class ContextTimer(object):
  37. """
  38. References:
  39. WithTimer in https://github.com/uber/ludwig/blob/master/ludwig/utils/time_utils.py
  40. """
  41. def __init__(self, name=None, use_log=False, quiet=False):
  42. self.use_log = use_log
  43. self.quiet = quiet
  44. if name is None:
  45. self.name = ''
  46. else:
  47. self.name = '{}, '.format(name.rstrip())
  48. def __enter__(self):
  49. self.start_time = time.time()
  50. if not self.quiet:
  51. self._print_or_log('{}{} starts'.format(self.name, self._now_time_str))
  52. return self
  53. def __exit__(self, exc_type, exc_val, exc_tb):
  54. if not self.quiet:
  55. self._print_or_log('{}elapsed_time = {:.5}s'.format(self.name, self.get_eplased_time()))
  56. self._print_or_log('{}{} ends'.format(self.name, self._now_time_str))
  57. @property
  58. def _now_time_str(self):
  59. return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
  60. def _print_or_log(self, output_str):
  61. if self.use_log:
  62. logging.info(output_str)
  63. else:
  64. print(output_str)
  65. def get_eplased_time(self):
  66. return time.time() - self.start_time
  67. def enter(self):
  68. """Manually trigger enter"""
  69. self.__enter__()
  70. def set_logger(filename, level=logging.INFO, logger_name=None):
  71. logger = logging.getLogger(logger_name)
  72. logger.setLevel(level)
  73. # Never mutate (insert/remove elements) the list you're currently iterating on.
  74. # If you need, make a copy.
  75. for handler in logger.handlers[:]:
  76. if isinstance(handler, logging.FileHandler):
  77. logger.removeHandler(handler)
  78. # FileHandler is subclass of StreamHandler, so isinstance(handler,
  79. # logging.StreamHandler) is True even if handler is FileHandler.
  80. # if (type(handler) == logging.StreamHandler) and (handler.stream == sys.stderr):
  81. elif type(handler) == logging.StreamHandler:
  82. logger.removeHandler(handler)
  83. file_handler = logging.FileHandler(filename)
  84. file_handler.setFormatter(logging.Formatter('%(message)s'))
  85. logger.addHandler(file_handler)
  86. console_handler = logging.StreamHandler()
  87. console_handler.setFormatter(logging.Formatter('%(message)s'))
  88. logger.addHandler(console_handler)
  89. return logger
  90. def print_arguments(args):
  91. assert isinstance(args, argparse.Namespace)
  92. arg_list = sorted(vars(args).items())
  93. for key, value in arg_list:
  94. print('{}: {}'.format(key, value))
  95. def save_arguments(filename, args, sort=True):
  96. assert isinstance(args, argparse.Namespace)
  97. args = vars(args)
  98. with open(filename, 'w') as f:
  99. json.dump(args, f, indent=4, sort_keys=sort)
  100. def strip_content_in_paren(string):
  101. """
  102. Notes:
  103. strip_content_in_paren cannot process nested paren correctly
  104. """
  105. return re.sub(r"\([^)]*\)|([^)]*)", "", string)
  106. def _to_timestamp(val):
  107. if val is None:
  108. timestamp = time.time()
  109. elif isinstance(val, numbers.Real):
  110. timestamp = float(val)
  111. elif isinstance(val, time.struct_time):
  112. timestamp = time.mktime(val)
  113. elif isinstance(val, datetime.datetime):
  114. timestamp = val.timestamp()
  115. elif isinstance(val, datetime.date):
  116. dt = datetime.datetime.combine(val, datetime.time())
  117. timestamp = dt.timestamp()
  118. elif isinstance(val, str):
  119. try:
  120. # The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmm'.
  121. dt = datetime.datetime.fromisoformat(val)
  122. timestamp = dt.timestamp()
  123. except:
  124. raise TypeError('when argument is str, it should conform to isoformat')
  125. else:
  126. raise TypeError('unsupported type!')
  127. return timestamp
  128. def get_timestamp(time_val=None, rounded=True):
  129. """timestamp in seconds
  130. """
  131. timestamp = _to_timestamp(time_val)
  132. if rounded:
  133. timestamp = round(timestamp)
  134. return timestamp
  135. def get_timestamp_ms(time_val=None, rounded=True):
  136. """timestamp in milliseconds
  137. """
  138. timestamp = _to_timestamp(time_val) * 1000
  139. if rounded:
  140. timestamp = round(timestamp)
  141. return timestamp
  142. def get_utc8now():
  143. tz = datetime.timezone(datetime.timedelta(hours=8))
  144. utc8now = datetime.datetime.now(tz)
  145. return utc8now