

import re;


class return_visitors:

	def __init__(self, n):
		self.m_n = n;
		self.m_ip_days = {};

	def begin(self):
	    pass;

	def process_line(self, s):

		try:
			array = s.split();
			ip = array[0];
			day = array[3][1:7];

			if self.m_ip_days.has_key(ip):

				if day not in self.m_ip_days[ip]:
					self.m_ip_days[ip].append(day);

			else:
				self.m_ip_days[ip] = [];
				self.m_ip_days[ip].append(day);

		except IndexError:
			pass;



	def end(self):

		ips = self.m_ip_days.keys();
		count = 0;

		for ip in ips:

			if len(self.m_ip_days[ip]) > self.m_n:
				count += 1;

		self.m_count = count;


	def description(self):
		return "# of IP addresses that visited more than %s days" % self.m_n;

	def result(self):
		return self.m_count;



class referring_domains:

	def __init__(self):
		self.m_domains = {};

	def begin(self):
		pass;

	def process_line(self, line):

		try:
			array = line.split();
			referrer = array[10];

			m = re.search('//[a-zA-Z0-9\-\.]*\.[a-zA-z]{2,3}/',
				      referrer);

			length = len(m.group(0));
			domain = m.group(0)[2:length-1];

			if self.m_domains.has_key(domain):
				self.m_domains[domain] += 1;
			else:
				self.m_domains[domain] = 1;

		except AttributeError:
			pass;
		except IndexError:
			pass;


	def end(self):
		pass;


	def description(self):
		return "Referring domains";


	def sort(self, key1, key2):
		if self.m_domains[key1] > self.m_domains[key2]:
			return -1;
		elif self.m_domains[key1] == self.m_domains[key2]:
			return 0;
		else:
			return 1;


	def result(self):

		s = "";
		keys = self.m_domains.keys();
		keys.sort(self.sort);

		for domain in keys:
			s += domain;
			s += " ";
			s += str(self.m_domains[domain]);
			s += "\n";

		s += "\n\n";

		return s;
