from collections import OrderedDict from datetime import datetime from typing import Optional class CacheHandler: def __init__(self, max_cache_size: int = 1000): # Using OrderedDict to maintain the order of insertion for efficient removal of oldest items self.cache = OrderedDict() self.cache["https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/"] = { "title": "Iker García-Ferrero | Personal Webpage", "date": datetime.now(), "summary_0": "Iker García-Ferrero es un candidato a PhD en Natural Language Processing (NLP) " "en la Universidad del País Vasco UPV/EHU, IXA Group y HiTZ Centro Vasco de Tecnología de la " "Lengua, financiado por una beca del Gobierno Vasco. " "En el pasado, ha realizado prácticas en Amazon y ha realizado una estancia " "de investigación en la Universidad de Pensilvania (EEUU)." "Sus investigaciones se centran en la creación de modelos y recursos para NLP en " "lenguas con pocos o ningún recurso disponible, utilizando técnicas de transferencia de " "datos y modelos. Recientemente también se ha especializado en el entrenamiento de LLMs", "summary_50": "Iker García-Ferrero es un candidato a PhD en NLP en la Universidad del País Vasco, " "con experiencia en Amazon, la Universidad de Pensilvania e HiTZ.", "summary_100": "Iker García-Ferrero es un candidato a PhD en NLP.", } self.max_cache_size = max_cache_size self.misses = 0 self.hits = 0 def add_to_cache( self, url: str, title: str, text: str, summary_type: int, summary: str ): # If URL already exists, update it and move it to the end to mark it as the most recently used if url in self.cache: self.cache.move_to_end(url) self.cache[url][f"summary_{summary_type}"] = summary self.cache[url]["date"] = datetime.now() else: # Add new entry to the cache self.cache[url] = { "title": title, "text": text, "date": datetime.now(), "summary_0": summary if summary_type == 0 else None, "summary_50": summary if summary_type == 50 else None, "summary_100": summary if summary_type == 100 else None, } # Remove the oldest item if cache exceeds max size if len(self.cache) > self.max_cache_size: self.cache.move_to_end( "https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/" ) # This is the default value in the demo, so we don't want to remove it self.cache.popitem(last=False) # pop the oldest item def get_from_cache( self, url: str, summary_type: int, second_try: bool = False ) -> Optional[tuple]: if url in self.cache and self.cache[url][f"summary_{summary_type}"] is not None: # Move the accessed item to the end to mark it as recently used self.cache.move_to_end(url) self.hits += 1 if second_try: # In the first try we didn't get the cache hit, probably because it was a shortened URL # So me decrease the number of misses, because we got the cache hit in the end self.misses -= 1 return ( self.cache[url]["title"], self.cache[url]["text"], self.cache[url][f"summary_{summary_type}"], ) else: if not second_try: self.misses += 1 return None, None, None def get_cache_stats(self): return self.hits, self.misses, len(self.cache)