Skip to content

lab

toolkitx.lab

Experimental and laboratory features for ToolkitX.

This subpackage contains experimental tools, research scripts, and other features that are still in the testing or refinement phase.

Classes

Translator

A translator implementation using py-transgpt with disk-based caching. Supports Baidu and Tencent translation engines.

Source code in toolkitx/lab/translator.py
class Translator:
    """
    A translator implementation using py-transgpt with disk-based caching.
    Supports Baidu and Tencent translation engines.
    """

    def __init__(
        self,
        engine: SUPPORTED_ENGINES,
        cache_path: str,
        api_id: str | None = None,
        api_key: str | None = None,
        target_lang: str = "en",  # Default target language
        source_lang: str = "auto",  # Default source language
    ):
        """
        Initialize the Translator.

        :param engine: The translation engine to use ('baidu' or 'tencent').
        :param cache_path: Path to the directory for storing the translation cache.
        :param api_id: API ID for the translation service. If None, attempts to load from ENV.
                       For Tencent, this is SecretId.
        :param api_key: API Key for the translation service. If None, attempts to load from ENV.
                        For Tencent, this is SecretKey.
        :param target_lang: Default target language for translations.
        :param source_lang: Default source language for translations.
        :raises ValueError: If the engine is not supported or API credentials are not found.
        """
        if engine not in ["baidu", "tencent"]:
            raise ValueError(
                f"Unsupported engine: {engine}. Supported engines are 'baidu', 'tencent'."
            )

        self.engine_name = engine
        self.cache = Cache(cache_path)
        self.default_target_lang = target_lang
        self.default_source_lang = source_lang

        # Load API credentials
        env_api_id_name = ""
        env_api_key_name = ""

        if self.engine_name == "baidu":
            env_api_id_name = "BAIDU_API_ID"
            env_api_key_name = "BAIDU_API_KEY"
            _api_id = api_id or os.getenv(env_api_id_name)
            _api_key = api_key or os.getenv(env_api_key_name)
            if not _api_id or not _api_key:
                raise ValueError(
                    f"Baidu API ID and API Key are required. "
                    f"Provide them as arguments or set {env_api_id_name} and {env_api_key_name} environment variables."
                )
            self.translator_instance = BaiduTranslation(
                api_id=_api_id, api_key=_api_key
            )

        elif self.engine_name == "tencent":
            env_api_id_name = "TENCENT_API_ID"
            env_api_key_name = "TENCENT_API_KEY"
            _api_id = api_id or os.getenv(env_api_id_name)
            _api_key = api_key or os.getenv(env_api_key_name)
            if not _api_id or not _api_key:
                raise ValueError(
                    f"Tencent Secret ID and Secret Key are required. "
                    f"Provide them as arguments or set {env_api_id_name} and {env_api_key_name} environment variables."
                )
            self.translator_instance = TencentTranslation(
                api_id=_api_id, api_key=_api_key
            )

    def _create_cache_key(self, text: str, target_lang: str, source_lang: str) -> str:
        """Helper to create a unique cache key."""
        return f"{self.engine_name}:{source_lang}:{target_lang}:{hashlib.md5(text.encode('utf8')).hexdigest()}"

    def translate(
        self, text: str, target_lang: str | None = None, source_lang: str | None = None
    ) -> str:
        """
        Translate text using the configured engine and cache.

        :param text: The text to translate.
        :param target_lang: The target language code. Defaults to instance's default_target_lang.
        :param source_lang: The source language code. Defaults to instance's default_source_lang.
        :return: The translated text.
        """
        _target_lang = target_lang or self.default_target_lang
        _source_lang = source_lang or self.default_source_lang

        if not text:
            return ""

        cache_key = self._create_cache_key(text, _target_lang, _source_lang)
        cached_translation = self.cache.get(cache_key)

        if cached_translation is not None:
            return cached_translation
        # print(f"Cache miss. Translating: {text[:30]}...") # For debugging
        try:
            # py-transgpt's translate method takes (text, target_language, source_language)
            translated_text = self.translator_instance.translate(
                text, _target_lang, _source_lang
            )
            if (
                translated_text
            ):  # Ensure we don't cache None or empty if translation fails silently
                self.cache.set(cache_key, translated_text)
            return translated_text
        except Exception as e:
            # Log error or handle as needed
            logger.warning(f"Error during translation with {self.engine_name}: {e}")
            # Depending on requirements, you might want to re-raise or return original text/error message
            raise  # Re-raise the exception to make the caller aware

    def clear_cache(self) -> None:
        """
        Clear all items from the translation cache for this translator instance.
        """
        self.cache.clear()

    def close_cache(self) -> None:
        """
        Close the cache. Important to call when done if cache is not used as a context manager.
        """
        self.cache.close()
Functions
__init__(engine, cache_path, api_id=None, api_key=None, target_lang='en', source_lang='auto')

Initialize the Translator.

:param engine: The translation engine to use ('baidu' or 'tencent'). :param cache_path: Path to the directory for storing the translation cache. :param api_id: API ID for the translation service. If None, attempts to load from ENV. For Tencent, this is SecretId. :param api_key: API Key for the translation service. If None, attempts to load from ENV. For Tencent, this is SecretKey. :param target_lang: Default target language for translations. :param source_lang: Default source language for translations. :raises ValueError: If the engine is not supported or API credentials are not found.

Source code in toolkitx/lab/translator.py
def __init__(
    self,
    engine: SUPPORTED_ENGINES,
    cache_path: str,
    api_id: str | None = None,
    api_key: str | None = None,
    target_lang: str = "en",  # Default target language
    source_lang: str = "auto",  # Default source language
):
    """
    Initialize the Translator.

    :param engine: The translation engine to use ('baidu' or 'tencent').
    :param cache_path: Path to the directory for storing the translation cache.
    :param api_id: API ID for the translation service. If None, attempts to load from ENV.
                   For Tencent, this is SecretId.
    :param api_key: API Key for the translation service. If None, attempts to load from ENV.
                    For Tencent, this is SecretKey.
    :param target_lang: Default target language for translations.
    :param source_lang: Default source language for translations.
    :raises ValueError: If the engine is not supported or API credentials are not found.
    """
    if engine not in ["baidu", "tencent"]:
        raise ValueError(
            f"Unsupported engine: {engine}. Supported engines are 'baidu', 'tencent'."
        )

    self.engine_name = engine
    self.cache = Cache(cache_path)
    self.default_target_lang = target_lang
    self.default_source_lang = source_lang

    # Load API credentials
    env_api_id_name = ""
    env_api_key_name = ""

    if self.engine_name == "baidu":
        env_api_id_name = "BAIDU_API_ID"
        env_api_key_name = "BAIDU_API_KEY"
        _api_id = api_id or os.getenv(env_api_id_name)
        _api_key = api_key or os.getenv(env_api_key_name)
        if not _api_id or not _api_key:
            raise ValueError(
                f"Baidu API ID and API Key are required. "
                f"Provide them as arguments or set {env_api_id_name} and {env_api_key_name} environment variables."
            )
        self.translator_instance = BaiduTranslation(
            api_id=_api_id, api_key=_api_key
        )

    elif self.engine_name == "tencent":
        env_api_id_name = "TENCENT_API_ID"
        env_api_key_name = "TENCENT_API_KEY"
        _api_id = api_id or os.getenv(env_api_id_name)
        _api_key = api_key or os.getenv(env_api_key_name)
        if not _api_id or not _api_key:
            raise ValueError(
                f"Tencent Secret ID and Secret Key are required. "
                f"Provide them as arguments or set {env_api_id_name} and {env_api_key_name} environment variables."
            )
        self.translator_instance = TencentTranslation(
            api_id=_api_id, api_key=_api_key
        )
clear_cache()

Clear all items from the translation cache for this translator instance.

Source code in toolkitx/lab/translator.py
def clear_cache(self) -> None:
    """
    Clear all items from the translation cache for this translator instance.
    """
    self.cache.clear()
close_cache()

Close the cache. Important to call when done if cache is not used as a context manager.

Source code in toolkitx/lab/translator.py
def close_cache(self) -> None:
    """
    Close the cache. Important to call when done if cache is not used as a context manager.
    """
    self.cache.close()
translate(text, target_lang=None, source_lang=None)

Translate text using the configured engine and cache.

:param text: The text to translate. :param target_lang: The target language code. Defaults to instance's default_target_lang. :param source_lang: The source language code. Defaults to instance's default_source_lang. :return: The translated text.

Source code in toolkitx/lab/translator.py
def translate(
    self, text: str, target_lang: str | None = None, source_lang: str | None = None
) -> str:
    """
    Translate text using the configured engine and cache.

    :param text: The text to translate.
    :param target_lang: The target language code. Defaults to instance's default_target_lang.
    :param source_lang: The source language code. Defaults to instance's default_source_lang.
    :return: The translated text.
    """
    _target_lang = target_lang or self.default_target_lang
    _source_lang = source_lang or self.default_source_lang

    if not text:
        return ""

    cache_key = self._create_cache_key(text, _target_lang, _source_lang)
    cached_translation = self.cache.get(cache_key)

    if cached_translation is not None:
        return cached_translation
    # print(f"Cache miss. Translating: {text[:30]}...") # For debugging
    try:
        # py-transgpt's translate method takes (text, target_language, source_language)
        translated_text = self.translator_instance.translate(
            text, _target_lang, _source_lang
        )
        if (
            translated_text
        ):  # Ensure we don't cache None or empty if translation fails silently
            self.cache.set(cache_key, translated_text)
        return translated_text
    except Exception as e:
        # Log error or handle as needed
        logger.warning(f"Error during translation with {self.engine_name}: {e}")
        # Depending on requirements, you might want to re-raise or return original text/error message
        raise  # Re-raise the exception to make the caller aware