First_agent_template_25

Sleeping

App Files Files Community

Alaaeldin commited on Mar 11

Commit

13b6e34

verified ·

1 Parent(s): 5ee2dd5

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -56

app.py CHANGED Viewed

@@ -2,8 +2,11 @@ from smolagents import CodeAgent, tool
 import datetime
 import pytz
 import yaml
-import requests
-import json
 from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
@@ -65,67 +68,196 @@ def get_current_time_in_timezone(timezone: str) -> str:
     except Exception as e:
         return f"Error fetching time for timezone '{timezone}': {str(e)}"
-# Weather Forecast Tool
 @tool
-def weather_forecast(location: str) -> str:
-    """Fetches weather forecast for a specified location.
     Args:
-        location: The location to get weather forecast for (city name or coordinates).
     """
     try:
-        # Connect to a public weather API
-        api_url = f"https://wttr.in/{location}?format=j1"
-        # Make the API request
-        response = requests.get(api_url, timeout=10)
-        response.raise_for_status()  # Raise an exception for HTTP errors
-        # Parse the JSON response
-        weather_data = response.json()
-        # Extract relevant information
-        current_condition = weather_data.get("current_condition", [{}])[0]
-        weather_desc = current_condition.get("weatherDesc", [{}])[0].get("value", "Unknown")
-        temp_c = current_condition.get("temp_C", "Unknown")
-        temp_f = current_condition.get("temp_F", "Unknown")
-        feels_like_c = current_condition.get("FeelsLikeC", "Unknown")
-        humidity = current_condition.get("humidity", "Unknown")
-        wind_speed = current_condition.get("windspeedKmph", "Unknown")
-        wind_dir = current_condition.get("winddir16Point", "Unknown")
-        # Get forecast for upcoming days
-        forecast = weather_data.get("weather", [])
-        forecast_info = ""
-        if forecast:
-            forecast_info = "\n\nForecast for the next few days:\n"
-            for day in forecast[:3]:  # Limit to 3 days
-                date = day.get("date", "Unknown")
-                max_temp_c = day.get("maxtempC", "Unknown")
-                min_temp_c = day.get("mintempC", "Unknown")
-                desc = day.get("hourly", [{}])[0].get("weatherDesc", [{}])[0].get("value", "Unknown")
-                forecast_info += f"- {date}: {desc}, Max: {max_temp_c}°C, Min: {min_temp_c}°C\n"
-        # Format the response
-        weather_report = f"""
-Weather for {location}:
-Current Conditions: {weather_desc}
-Temperature: {temp_c}°C / {temp_f}°F (Feels like: {feels_like_c}°C)
-Humidity: {humidity}%
-Wind: {wind_speed} km/h, Direction: {wind_dir}
-{forecast_info}
-"""
-        return weather_report.strip()
-    except requests.exceptions.RequestException as e:
-        return f"Error fetching weather for {location}: Connection error - {str(e)}"
-    except json.JSONDecodeError:
-        return f"Error fetching weather for {location}: Invalid response from weather service"
     except Exception as e:
-        return f"Error fetching weather for {location}: {str(e)}"
 # Set up the agent with our tools
 final_answer = FinalAnswerTool()
@@ -142,10 +274,10 @@ model = HfApiModel(
     custom_role_conversions=None,
 )
-# Create agent with our tools (now 4 including final_answer)
 agent = CodeAgent(
     model=model,
-    tools=[text_analyzer, get_current_time_in_timezone, weather_forecast, final_answer],
     max_steps=6,
     verbosity_level=1,
     grammar=None,

 import datetime
 import pytz
 import yaml
+import os
+import re
+import numpy as np
+from typing import List, Optional, Dict, Any
+import io
 from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
     except Exception as e:
         return f"Error fetching time for timezone '{timezone}': {str(e)}"
+# Simple vector embedding function using basic word frequency
+def get_embedding(text: str, normalize: bool = True) -> np.ndarray:
+    """Create a simple bag-of-words embedding for the text"""
+    # Lowercase and clean text
+    text = text.lower()
+    words = re.findall(r'\b\w+\b', text)
+    # Create a basic vocabulary (this is very simplified)
+    vocabulary = {}
+    for word in words:
+        if word not in vocabulary:
+            vocabulary[word] = len(vocabulary)
+    # Create vector
+    vector = np.zeros(max(1, len(vocabulary)))
+    for word in words:
+        if word in vocabulary:
+            vector[vocabulary[word]] += 1
+    # Normalize if requested
+    if normalize and np.sum(vector) > 0:
+        vector = vector / np.sqrt(np.sum(vector ** 2))
+    return vector
+def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    """Calculate cosine similarity between two vectors"""
+    # Handle zero vectors
+    if np.sum(a) == 0 or np.sum(b) == 0:
+        return 0
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+def extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
+    """Extract text from PDF bytes"""
+    try:
+        # First try to import PyPDF2
+        try:
+            import PyPDF2
+        except ImportError:
+            return "PDF processing requires PyPDF2 library which is not available."
+        with io.BytesIO(pdf_bytes) as pdf_file:
+            pdf_reader = PyPDF2.PdfReader(pdf_file)
+            text = ""
+            for page_num in range(len(pdf_reader.pages)):
+                page = pdf_reader.pages[page_num]
+                text += page.extract_text() + "\n"
+            return text
+    except Exception as e:
+        return f"Error extracting text from PDF: {str(e)}"
+def extract_text_from_pdf(file_path: str) -> str:
+    """Extract text from PDF file"""
+    try:
+        # First try to import PyPDF2
+        try:
+            import PyPDF2
+        except ImportError:
+            return "PDF processing requires PyPDF2 library which is not available."
+        with open(file_path, 'rb') as pdf_file:
+            pdf_reader = PyPDF2.PdfReader(pdf_file)
+            text = ""
+            for page_num in range(len(pdf_reader.pages)):
+                page = pdf_reader.pages[page_num]
+                text += page.extract_text() + "\n"
+            return text
+    except Exception as e:
+        return f"Error extracting text from PDF: {str(e)}"
 @tool
+def semantic_search(corpus: str, query: str, top_k: int = 3, file_path: Optional[str] = None) -> str:
+    """Performs semantic search on a corpus of text or uploaded PDF.
     Args:
+        corpus: The text corpus to search within (could be a large text or list of documents).
+            If empty and file_path is provided, will extract text from the PDF.
+        query: The search query.
+        top_k: Number of top results to return.
+        file_path: Optional path to a PDF file to extract text from.
     """
     try:
+        final_corpus = corpus
+        # Try to handle PDF file if specified
+        if not corpus and file_path:
+            # Check if file exists
+            if os.path.exists(file_path):
+                # Check if this is a PDF by extension
+                if file_path.lower().endswith('.pdf'):
+                    pdf_text = extract_text_from_pdf(file_path)
+                    if pdf_text.startswith("Error") or pdf_text.startswith("PDF processing requires"):
+                        return pdf_text
+                    final_corpus = pdf_text
+                else:
+                    # If not PDF, try to read as text
+                    try:
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            final_corpus = f.read()
+                    except Exception as e:
+                        return f"Error reading file: {str(e)}"
+            else:
+                return f"File not found: {file_path}"
+        if not final_corpus:
+            return "Error: No text corpus provided for search."
+        # Split corpus into chunks/sentences for searching
+        # This is a simple approach - in a real system you would use a more sophisticated chunking method
+        chunks = re.split(r'(?<=[.!?])\s+', final_corpus)
+        chunks = [chunk.strip() for chunk in chunks if len(chunk.strip()) > 10]
+        if not chunks:
+            return "No valid text chunks found in the corpus."
+        # Get query embedding
+        query_embedding = get_embedding(query)
+        # Get embeddings for each chunk and calculate similarity
+        results = []
+        for i, chunk in enumerate(chunks):
+            chunk_embedding = get_embedding(chunk)
+            similarity = cosine_similarity(query_embedding, chunk_embedding)
+            results.append((i, chunk, similarity))
+        # Sort by similarity score (descending)
+        results.sort(key=lambda x: x[2], reverse=True)
+        # Format results
+        output = f"Search results for: '{query}'\n\n"
+        for i, (chunk_idx, chunk, score) in enumerate(results[:top_k]):
+            # Truncate long chunks for display
+            display_chunk = chunk
+            if len(display_chunk) > 200:
+                display_chunk = display_chunk[:197] + "..."
+            output += f"{i+1}. [Score: {score:.2f}] {display_chunk}\n\n"
+        if not results:
+            output += "No matching results found."
+        return output
     except Exception as e:
+        return f"Error performing semantic search: {str(e)}"
+@tool
+def list_available_tools() -> str:
+    """Lists all available tools and provides usage examples for each."""
+    tools_documentation = """
+# Available Tools
+This agent has the following tools available:
+## 1. Text Analyzer
+Analyzes text and provides statistics including word count, character count, unique words count, average word length, and most common words.
+**Example usage:**
+- "Analyze this text: The quick brown fox jumps over the lazy dog."
+- "Give me statistics about this paragraph: Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
+## 2. Current Time in Timezone
+Fetches the current local time for a specified timezone.
+**Example usage:**
+- "What time is it in Tokyo?"
+- "Get the current time in America/New_York"
+- "Tell me the time in UTC"
+## 3. Semantic Search
+Performs semantic search on a corpus of text or uploaded PDF document to find the most relevant sections matching a query.
+**Example usage:**
+- "Search for 'climate change' in this text: Global warming is the long-term heating of Earth's surface observed since the pre-industrial period due to human activities, primarily fossil fuel burning, which increases heat-trapping greenhouse gas levels in Earth's atmosphere."
+- "If I have uploaded a PDF file called 'research.pdf', search for 'vaccination' in it"
+- "Find information about 'neural networks' in this text: [your long text here]"
+## How to Use This Agent
+1. Type your request in the chat box below
+2. The agent will process your request and use the appropriate tool
+3. Results will be displayed in this conversation area
+For complex tasks, you may need to provide additional context or data. Be as specific as possible in your requests.
+"""
+    return tools_documentation
 # Set up the agent with our tools
 final_answer = FinalAnswerTool()
     custom_role_conversions=None,
 )
+# Create agent with our tools (including the new list_available_tools)
 agent = CodeAgent(
     model=model,
+    tools=[text_analyzer, get_current_time_in_timezone, semantic_search, list_available_tools, final_answer],
     max_steps=6,
     verbosity_level=1,
     grammar=None,