lhoestq HF staff commited on
Commit
9fc6ad5
·
verified ·
1 Parent(s): c8dd83d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -9,11 +9,14 @@ def greet(SQL_Query):
9
  return con.sql(SQL_Query).df()
10
 
11
  examples = [
12
- "SELECT dump, avg(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
13
- "SELECT dump, max(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
14
- "SELECT dump, min(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
 
 
 
15
  ]
16
  css = "#component-4{display: block;}"
17
- description = "Run SQL queries on the FineWeb dataset"
18
  demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples, cache_examples=False, description=description, css=css)
19
  demo.launch()
 
9
  return con.sql(SQL_Query).df()
10
 
11
  examples = [
12
+ "SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10;",
13
+ "SELECT text, language_score FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' WHERE language_score > 0.97 LIMIT 10;",
14
+ "SELECT text, language_score FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' WHERE language_score < 0.67 LIMIT 10;",
15
+ "SELECT dump, min(language_score), avg(language_score), max(language_score) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
16
+ "SELECT text, language_score FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' WHERE text SIMILAR TO '([A-Z ]){4,}.*' LIMIT 10;",
17
+ "SELECT dump, min(token_count), avg(token_count), max(token_count) FROM\n(SELECT * FROM 'hf://datasets/HuggingFaceFW/fineweb/sample/10BT/*.parquet' LIMIT 10000)\nGROUP BY dump;",
18
  ]
19
  css = "#component-4{display: block;}"
20
+ description = "Run SQL queries on the HuggingFaceFW/fineweb dataset"
21
  demo = gr.Interface(fn=greet, inputs="text", outputs="dataframe", examples=examples, cache_examples=False, description=description, css=css)
22
  demo.launch()