rajvaishnavi455 commited on
Commit
d40b9f1
·
verified ·
1 Parent(s): d7ebbef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -180
app.py CHANGED
@@ -1,180 +1,186 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
- import pandas as pd
4
- from sentence_transformers import SentenceTransformer
5
- from langchain.vectorstores import FAISS
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- import gradio as gr
8
-
9
- # Step 1: Scrape data from Analytics Vidhya's free courses page
10
- def scrape_courses():
11
- page_url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
12
- response = requests.get(page_url)
13
- response.raise_for_status()
14
-
15
- html_parser = BeautifulSoup(response.content, 'html.parser')
16
- course_data = []
17
-
18
- # Extract course details
19
- for section in html_parser.find_all('div', class_='course-cards__container'):
20
- section_heading = section.find('h3', class_='section__heading')
21
- category_name = section_heading.get_text(strip=True) if section_heading else "Miscellaneous"
22
-
23
- for card in section.find_all('a', class_='course-card'):
24
- course_title = card.find('h3')
25
- course_image = card.find('img', class_='course-card__img')
26
-
27
- if course_title and course_image:
28
- title = course_title.get_text(strip=True)
29
- image_url = course_image['src']
30
- link = card['href']
31
-
32
- # Ensure full URL
33
- if not link.startswith('http'):
34
- link = 'https://courses.analyticsvidhya.com' + link
35
-
36
- course_data.append({
37
- 'course_name': title,
38
- 'course_category': category_name,
39
- 'thumbnail': image_url,
40
- 'course_url': link
41
- })
42
- return pd.DataFrame(course_data)
43
-
44
- # Step 2: Generate embeddings and create FAISS index using HuggingFace embeddings
45
- def create_faiss_index(course_texts):
46
- try:
47
- # Load HuggingFace Embedding model
48
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
49
-
50
- # Add course metadata (index) for each course
51
- metadata = [{"index": idx} for idx in range(len(course_texts))]
52
-
53
- # Create FAISS vector store with metadata
54
- vector_store = FAISS.from_texts(course_texts, embeddings, metadatas=metadata)
55
- print("FAISS index created successfully!")
56
- return vector_store
57
- except Exception as e:
58
- print(f"Error during FAISS index creation: {str(e)}")
59
- return None
60
-
61
- # Step 3: Define search function
62
- def search_courses(user_query):
63
- if vector_store is None:
64
- return '<p class="error">Error: FAISS index not initialized. Please check the embedding setup.</p>'
65
- try:
66
- # Perform similarity search
67
- results = vector_store.similarity_search(user_query, k=10)
68
- if results:
69
- output_html = '<div class="search-results">'
70
- for result in results:
71
- course_idx = result.metadata['index'] # Access the index from metadata
72
- course = course_df.iloc[course_idx]
73
- output_html += f'''
74
- <div class="course-box">
75
- <img src="{course['thumbnail']}" alt="{course['course_name']}" class="course-thumbnail"/>
76
- <div class="course-details">
77
- <h3>{course['course_name']}</h3>
78
- <p><strong>Category:</strong> {course['course_category']}</p>
79
- <a href="{course['course_url']}" target="_blank" class="view-course">Explore Course</a>
80
- </div>
81
- </div>'''
82
- output_html += '</div>'
83
- return output_html
84
- else:
85
- return '<p class="no-matches">No matching courses found. Please refine your query.</p>'
86
- except Exception as e:
87
- return f'<p class="error">Error during search: {str(e)}</p>'
88
-
89
- # Step 4: Initialize scraping, FAISS index, and Gradio interface
90
- course_df = scrape_courses()
91
- course_texts = course_df['course_name'].tolist() # Texts for embeddings
92
- vector_store = create_faiss_index(course_texts)
93
-
94
- # Custom CSS for Gradio interface
95
- custom_style = """
96
- body {
97
- font-family: 'Roboto', sans-serif;
98
- background-color: #f7f9fc;
99
- margin: 0;
100
- padding: 0;
101
- }
102
- .search-results {
103
- display: flex;
104
- flex-wrap: wrap;
105
- gap: 15px;
106
- justify-content: center;
107
- }
108
- .course-box {
109
- background-color: #fff;
110
- border: 1px solid #e3e3e3;
111
- border-radius: 8px;
112
- overflow: hidden;
113
- width: 300px;
114
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
115
- transition: transform 0.3s;
116
- }
117
- .course-box:hover {
118
- transform: translateY(-5px);
119
- }
120
- .course-thumbnail {
121
- width: 100%;
122
- height: 150px;
123
- object-fit: cover;
124
- }
125
- .course-details {
126
- padding: 15px;
127
- }
128
- .course-details h3 {
129
- margin: 0 0 10px;
130
- font-size: 18px;
131
- color: #333;
132
- }
133
- .course-details p {
134
- margin: 0 0 15px;
135
- font-size: 14px;
136
- color: #555;
137
- }
138
- .view-course {
139
- display: inline-block;
140
- padding: 10px 20px;
141
- background-color: #007bff;
142
- color: #fff;
143
- text-decoration: none;
144
- border-radius: 5px;
145
- font-size: 14px;
146
- }
147
- .view-course:hover {
148
- background-color: #0056b3;
149
- }
150
- .no-matches {
151
- text-align: center;
152
- font-size: 16px;
153
- color: #666;
154
- margin: 20px 0;
155
- }
156
- .error {
157
- color: red;
158
- font-size: 16px;
159
- text-align: center;
160
- margin: 20px;
161
- }
162
- """
163
-
164
- # Gradio Interface
165
- tool_interface = gr.Interface(
166
- fn=search_courses,
167
- inputs=gr.Textbox(label="Search for Free Courses", placeholder="Type keywords like 'Data Science' or 'Python'"),
168
- outputs=gr.HTML(label="Search Results"),
169
- title="Find Free Courses",
170
- description="Quickly find free courses avalable on Analytics Vidhya using this tool.",
171
- css=custom_style,
172
- examples=[
173
- ["Generative AI"],
174
- ["Business Analytics"],
175
- ["Python Programming"]
176
- ]
177
- )
178
-
179
- if __name__ == "__main__":
180
- tool_interface.launch()
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ import gradio as gr
7
+
8
+ # Step 1: Scrape data from Analytics Vidhya's free courses page
9
+ def scrape_courses():
10
+ page_url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
+ response = requests.get(page_url)
12
+ response.raise_for_status()
13
+
14
+ html_parser = BeautifulSoup(response.content, 'html.parser')
15
+ course_data = []
16
+
17
+ # Extract course details
18
+ for section in html_parser.find_all('div', class_='course-cards__container'):
19
+ section_heading = section.find('h3', class_='section__heading')
20
+ category_name = section_heading.get_text(strip=True) if section_heading else "Miscellaneous"
21
+
22
+ for card in section.find_all('a', class_='course-card'):
23
+ course_title = card.find('h3')
24
+ course_image = card.find('img', class_='course-card__img')
25
+
26
+ if course_title and course_image:
27
+ title = course_title.get_text(strip=True)
28
+ image_url = course_image['src']
29
+ link = card['href']
30
+
31
+ # Ensure full URL
32
+ if not link.startswith('http'):
33
+ link = 'https://courses.analyticsvidhya.com' + link
34
+
35
+ course_data.append({
36
+ 'course_name': title,
37
+ 'course_category': category_name,
38
+ 'thumbnail': image_url,
39
+ 'course_url': link
40
+ })
41
+ return pd.DataFrame(course_data)
42
+
43
+ # Step 2: Generate embeddings and create FAISS index using HuggingFace embeddings
44
+ def create_faiss_index(course_texts):
45
+ try:
46
+ # Load HuggingFace Embedding model
47
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
48
+
49
+ # Add course metadata (index) for each course
50
+ metadata = [{"index": idx} for idx in range(len(course_texts))]
51
+
52
+ # Create FAISS vector store with metadata
53
+ vector_store = FAISS.from_texts(course_texts, embeddings, metadatas=metadata)
54
+ print("FAISS index created successfully!")
55
+ return vector_store
56
+ except Exception as e:
57
+ print(f"Error during FAISS index creation: {str(e)}")
58
+ return None
59
+
60
+ # Step 3: Define search function
61
+ def search_courses(user_query):
62
+ if vector_store is None:
63
+ return '<p class="error">Error: FAISS index not initialized. Please check the embedding setup.</p>'
64
+ try:
65
+ # Perform similarity search
66
+ results = vector_store.similarity_search(user_query, k=10)
67
+ if results:
68
+ output_html = '<div class="search-results">'
69
+ for result in results:
70
+ course_idx = result.metadata['index'] # Access the index from metadata
71
+ course = course_df.iloc[course_idx]
72
+ output_html += f'''
73
+ <div class="course-box">
74
+ <img src="{course['thumbnail']}" alt="{course['course_name']}" class="course-thumbnail"/>
75
+ <div class="course-details">
76
+ <h3>{course['course_name']}</h3>
77
+ <p><strong>Category:</strong> {course['course_category']}</p>
78
+ <a href="{course['course_url']}" target="_blank" class="view-course">Explore Course</a>
79
+ </div>
80
+ </div>'''
81
+ output_html += '</div>'
82
+ return output_html
83
+ else:
84
+ return '<p class="no-matches">No matching courses found. Please refine your query.</p>'
85
+ except Exception as e:
86
+ return f'<p class="error">Error during search: {str(e)}</p>'
87
+
88
+ # Step 4: Create a simpler FAISS search for Gradio interface
89
+ def search(query):
90
+ # Perform a similarity search in the FAISS index
91
+ results = vector_store.similarity_search(query, k=3) # You can adjust the 'k' value as needed
92
+ # Return the results in a readable format
93
+ return "\n".join([f"Result {i+1}: {result['text']}" for i, result in enumerate(results)])
94
+
95
+ # Step 5: Initialize scraping, FAISS index, and Gradio interface
96
+ course_df = scrape_courses()
97
+ course_texts = course_df['course_name'].tolist() # Texts for embeddings
98
+ vector_store = create_faiss_index(course_texts)
99
+
100
+ # Custom CSS for Gradio interface
101
+ custom_style = """
102
+ body {
103
+ font-family: 'Roboto', sans-serif;
104
+ background-color: #f7f9fc;
105
+ margin: 0;
106
+ padding: 0;
107
+ }
108
+ .search-results {
109
+ display: flex;
110
+ flex-wrap: wrap;
111
+ gap: 15px;
112
+ justify-content: center;
113
+ }
114
+ .course-box {
115
+ background-color: #fff;
116
+ border: 1px solid #e3e3e3;
117
+ border-radius: 8px;
118
+ overflow: hidden;
119
+ width: 300px;
120
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
121
+ transition: transform 0.3s;
122
+ }
123
+ .course-box:hover {
124
+ transform: translateY(-5px);
125
+ }
126
+ .course-thumbnail {
127
+ width: 100%;
128
+ height: 150px;
129
+ object-fit: cover;
130
+ }
131
+ .course-details {
132
+ padding: 15px;
133
+ }
134
+ .course-details h3 {
135
+ margin: 0 0 10px;
136
+ font-size: 18px;
137
+ color: #333;
138
+ }
139
+ .course-details p {
140
+ margin: 0 0 15px;
141
+ font-size: 14px;
142
+ color: #555;
143
+ }
144
+ .view-course {
145
+ display: inline-block;
146
+ padding: 10px 20px;
147
+ background-color: #007bff;
148
+ color: #fff;
149
+ text-decoration: none;
150
+ border-radius: 5px;
151
+ font-size: 14px;
152
+ }
153
+ .view-course:hover {
154
+ background-color: #0056b3;
155
+ }
156
+ .no-matches {
157
+ text-align: center;
158
+ font-size: 16px;
159
+ color: #666;
160
+ margin: 20px 0;
161
+ }
162
+ .error {
163
+ color: red;
164
+ font-size: 16px;
165
+ text-align: center;
166
+ margin: 20px;
167
+ }
168
+ """
169
+
170
+ # Gradio Interface
171
+ tool_interface = gr.Interface(
172
+ fn=search_courses,
173
+ inputs=gr.Textbox(label="Search for Free Courses", placeholder="Type keywords like 'Data Science' or 'Python'"),
174
+ outputs=gr.HTML(label="Search Results"),
175
+ title="Find Free Courses",
176
+ description="Quickly find free courses available on Analytics Vidhya using this tool.",
177
+ css=custom_style,
178
+ examples=[
179
+ ["Generative AI"],
180
+ ["Business Analytics"],
181
+ ["Python Programming"]
182
+ ]
183
+ )
184
+
185
+ # Launch Gradio interface
186
+ tool_interface.launch()