dwb2023 commited on
Commit
56dbc6b
·
verified ·
1 Parent(s): fb7f43d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -13,10 +13,7 @@ if not hf_token:
13
  if not hf_user:
14
  raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
15
 
16
- # Perform login using the token
17
- # login(token=hf_token, add_to_git_credential=True)
18
-
19
- SUPPORTED_FILE_TYPES = ["txt", "shell", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html", "ini"]
20
 
21
  def validate_url(url):
22
  return url.startswith('https://')
@@ -24,7 +21,6 @@ def validate_url(url):
24
  def clone_repo(url, repo_dir, hf_token, hf_user):
25
  env = os.environ.copy()
26
  env['GIT_LFS_SKIP_SMUDGE'] = '1'
27
- # Construct the Git URL with the token and author name for authentication
28
  token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
29
  result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
30
  if result.returncode != 0:
@@ -37,11 +33,16 @@ def get_file_summary(file_path, file_type):
37
  "name": os.path.relpath(file_path),
38
  "type": file_type,
39
  "size": size,
 
 
40
  }
41
 
42
- def read_file_content(file_path):
43
  with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
44
- return file.read()
 
 
 
45
 
46
  def validate_file_types(directory):
47
  m = Magika()
@@ -88,7 +89,6 @@ def extract_repo_content(url, hf_token, hf_user):
88
 
89
  extracted_content.append(content)
90
 
91
- # Cleanup temporary directory
92
  subprocess.run(["rm", "-rf", repo_dir])
93
 
94
  return extracted_content
@@ -100,6 +100,8 @@ def format_output(extracted_content, repo_url):
100
  formatted_output += f"### File: {file_data['header']['name']}\n"
101
  formatted_output += f"**Type:** {file_data['header']['type']}\n"
102
  formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
 
 
103
  formatted_output += "#### Content:\n"
104
  formatted_output += f"```\n{file_data['content']}\n```\n\n"
105
  else:
@@ -130,4 +132,4 @@ with app:
130
 
131
  extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
132
 
133
- app.launch()
 
13
  if not hf_user:
14
  raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
15
 
16
+ SUPPORTED_FILE_TYPES = ["txt", "shell", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html", "ini", "jsonl", "ipynb"]
 
 
 
17
 
18
  def validate_url(url):
19
  return url.startswith('https://')
 
21
  def clone_repo(url, repo_dir, hf_token, hf_user):
22
  env = os.environ.copy()
23
  env['GIT_LFS_SKIP_SMUDGE'] = '1'
 
24
  token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
25
  result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
26
  if result.returncode != 0:
 
33
  "name": os.path.relpath(file_path),
34
  "type": file_type,
35
  "size": size,
36
+ "creation_date": os.path.getctime(file_path),
37
+ "modification_date": os.path.getmtime(file_path)
38
  }
39
 
40
+ def read_file_content(file_path, max_size=32*1024):
41
  with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
42
+ if os.path.getsize(file_path) > max_size:
43
+ return file.read(max_size) + "\n... [Content Truncated] ..."
44
+ else:
45
+ return file.read()
46
 
47
  def validate_file_types(directory):
48
  m = Magika()
 
89
 
90
  extracted_content.append(content)
91
 
 
92
  subprocess.run(["rm", "-rf", repo_dir])
93
 
94
  return extracted_content
 
100
  formatted_output += f"### File: {file_data['header']['name']}\n"
101
  formatted_output += f"**Type:** {file_data['header']['type']}\n"
102
  formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
103
+ formatted_output += f"**Created:** {file_data['header']['creation_date']}\n"
104
+ formatted_output += f"**Modified:** {file_data['header']['modification_date']}\n"
105
  formatted_output += "#### Content:\n"
106
  formatted_output += f"```\n{file_data['content']}\n```\n\n"
107
  else:
 
132
 
133
  extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
134
 
135
+ app.launch()