Spaces:
Sleeping
Sleeping
Commit
·
b25fb44
1
Parent(s):
a59a803
Sync with the main repo
Browse files- apps/common/auto_zip.py +53 -0
- apps/data_explorer/data_explorer.py +16 -2
- apps/data_explorer/downloader.py +15 -1
- apps/data_explorer/loader.py +24 -35
- sync.sh +15 -0
apps/common/auto_zip.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 2 |
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
| 3 |
+
# you may not use this file except in compliance with the License.
|
| 4 |
+
# You may obtain a copy of the License at
|
| 5 |
+
#
|
| 6 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 7 |
+
#
|
| 8 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 9 |
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
| 10 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 11 |
+
# See the License for the specific language governing permissions and
|
| 12 |
+
# limitations under the License.
|
| 13 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 14 |
+
import json
|
| 15 |
+
import os
|
| 16 |
+
import zipfile
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class AutoZip:
|
| 20 |
+
|
| 21 |
+
def __init__(self, zip_path: str, ext: str = ".json"):
|
| 22 |
+
self.zip_path = zip_path
|
| 23 |
+
self.zip = zipfile.ZipFile(zip_path, "r")
|
| 24 |
+
self.fl = [f for f in self.zip.filelist if f.filename.endswith(ext)]
|
| 25 |
+
|
| 26 |
+
def __next__(self):
|
| 27 |
+
if self.index >= len(self.fl):
|
| 28 |
+
raise StopIteration
|
| 29 |
+
else:
|
| 30 |
+
finfo = self.fl[self.index]
|
| 31 |
+
with self.zip.open(finfo) as f:
|
| 32 |
+
raw_json = json.loads(f.read().decode("utf-8"))
|
| 33 |
+
self.index += 1
|
| 34 |
+
return raw_json
|
| 35 |
+
|
| 36 |
+
def __len__(self):
|
| 37 |
+
return len(self.fl)
|
| 38 |
+
|
| 39 |
+
def __iter__(self):
|
| 40 |
+
self.index = 0
|
| 41 |
+
return self
|
| 42 |
+
|
| 43 |
+
def as_dict(self, include_zip_name: bool = False):
|
| 44 |
+
d = dict()
|
| 45 |
+
for finfo in self.fl:
|
| 46 |
+
with self.zip.open(finfo) as f:
|
| 47 |
+
raw_text = f.read().decode("utf-8")
|
| 48 |
+
if include_zip_name:
|
| 49 |
+
key = os.path.split(self.zip_path)[1] + "/" + finfo.filename
|
| 50 |
+
else:
|
| 51 |
+
key = finfo.filename
|
| 52 |
+
d[key] = raw_text
|
| 53 |
+
return d
|
apps/data_explorer/data_explorer.py
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Gradio-based web UI to explore the Camel dataset.
|
| 3 |
"""
|
|
@@ -39,7 +52,8 @@ def parse_arguments():
|
|
| 39 |
return args
|
| 40 |
|
| 41 |
|
| 42 |
-
def construct_ui(blocks, datasets: Datasets,
|
|
|
|
| 43 |
""" Build Gradio UI and populate with chat data from JSONs.
|
| 44 |
|
| 45 |
Args:
|
|
@@ -213,7 +227,7 @@ def construct_ui(blocks, datasets: Datasets, default_dataset: str = None):
|
|
| 213 |
Returns:
|
| 214 |
List[Tuple]: Chat history in chatbot UI element format.
|
| 215 |
"""
|
| 216 |
-
history = []
|
| 217 |
curr_qa = (None, None)
|
| 218 |
for k in sorted(messages.keys()):
|
| 219 |
msg = messages[k]
|
|
|
|
| 1 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 2 |
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
| 3 |
+
# you may not use this file except in compliance with the License.
|
| 4 |
+
# You may obtain a copy of the License at
|
| 5 |
+
#
|
| 6 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 7 |
+
#
|
| 8 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 9 |
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
| 10 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 11 |
+
# See the License for the specific language governing permissions and
|
| 12 |
+
# limitations under the License.
|
| 13 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 14 |
"""
|
| 15 |
Gradio-based web UI to explore the Camel dataset.
|
| 16 |
"""
|
|
|
|
| 52 |
return args
|
| 53 |
|
| 54 |
|
| 55 |
+
def construct_ui(blocks, datasets: Datasets,
|
| 56 |
+
default_dataset: Optional[str] = None):
|
| 57 |
""" Build Gradio UI and populate with chat data from JSONs.
|
| 58 |
|
| 59 |
Args:
|
|
|
|
| 227 |
Returns:
|
| 228 |
List[Tuple]: Chat history in chatbot UI element format.
|
| 229 |
"""
|
| 230 |
+
history: List[Tuple] = []
|
| 231 |
curr_qa = (None, None)
|
| 232 |
for k in sorted(messages.keys()):
|
| 233 |
msg = messages[k]
|
apps/data_explorer/downloader.py
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import urllib.request
|
| 3 |
|
| 4 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 5 |
|
| 6 |
REPO_ROOT = os.path.realpath(
|
| 7 |
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
|
|
@@ -23,7 +37,7 @@ def download_data():
|
|
| 23 |
hf_hub_download(repo_id="camel-ai/code", repo_type="dataset",
|
| 24 |
filename="code_chat.zip", local_dir=data_dir,
|
| 25 |
local_dir_use_symlinks=False)
|
| 26 |
-
except:
|
| 27 |
for name in ("ai_society_chat.zip", "code_chat.zip"):
|
| 28 |
data_url = ("https://storage.googleapis.com/"
|
| 29 |
f"camel-bucket/datasets/private/{name}")
|
|
|
|
| 1 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 2 |
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
| 3 |
+
# you may not use this file except in compliance with the License.
|
| 4 |
+
# You may obtain a copy of the License at
|
| 5 |
+
#
|
| 6 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 7 |
+
#
|
| 8 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 9 |
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
| 10 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 11 |
+
# See the License for the specific language governing permissions and
|
| 12 |
+
# limitations under the License.
|
| 13 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 14 |
import os
|
| 15 |
import urllib.request
|
| 16 |
|
| 17 |
from huggingface_hub import hf_hub_download
|
| 18 |
+
from huggingface_hub.utils._errors import RepositoryNotFoundError
|
| 19 |
|
| 20 |
REPO_ROOT = os.path.realpath(
|
| 21 |
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
|
|
|
|
| 37 |
hf_hub_download(repo_id="camel-ai/code", repo_type="dataset",
|
| 38 |
filename="code_chat.zip", local_dir=data_dir,
|
| 39 |
local_dir_use_symlinks=False)
|
| 40 |
+
except RepositoryNotFoundError:
|
| 41 |
for name in ("ai_society_chat.zip", "code_chat.zip"):
|
| 42 |
data_url = ("https://storage.googleapis.com/"
|
| 43 |
f"camel-bucket/datasets/private/{name}")
|
apps/data_explorer/loader.py
CHANGED
|
@@ -1,16 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Everything related to parsing the data JSONs into UI-compatible format.
|
| 3 |
"""
|
| 4 |
|
| 5 |
import glob
|
| 6 |
-
import json
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
-
import
|
| 10 |
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
| 11 |
|
| 12 |
from tqdm import tqdm
|
| 13 |
|
|
|
|
|
|
|
| 14 |
ChatHistory = Dict[str, Any]
|
| 15 |
ParsedChatHistory = Dict[str, Any]
|
| 16 |
AllChats = Dict[str, Any]
|
|
@@ -20,30 +33,6 @@ REPO_ROOT = os.path.realpath(
|
|
| 20 |
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
|
| 21 |
|
| 22 |
|
| 23 |
-
class AutoZip:
|
| 24 |
-
def __init__(self, zip_path: str, ext: str = ".json"):
|
| 25 |
-
self.zip_path = zip_path
|
| 26 |
-
self.zip = zipfile.ZipFile(zip_path, "r")
|
| 27 |
-
self.fl = [f for f in self.zip.filelist if f.filename.endswith(ext)]
|
| 28 |
-
|
| 29 |
-
def __next__(self):
|
| 30 |
-
if self.index >= len(self.fl):
|
| 31 |
-
raise StopIteration
|
| 32 |
-
else:
|
| 33 |
-
finfo = self.fl[self.index]
|
| 34 |
-
with self.zip.open(finfo) as f:
|
| 35 |
-
raw_json = json.loads(f.read().decode("utf-8"))
|
| 36 |
-
self.index += 1
|
| 37 |
-
return raw_json
|
| 38 |
-
|
| 39 |
-
def __len__(self):
|
| 40 |
-
return len(self.fl)
|
| 41 |
-
|
| 42 |
-
def __iter__(self):
|
| 43 |
-
self.index = 0
|
| 44 |
-
return self
|
| 45 |
-
|
| 46 |
-
|
| 47 |
def parse(raw_chat: ChatHistory) -> Union[ParsedChatHistory, None]:
|
| 48 |
""" Gets the JSON raw chat data, validates it and transforms
|
| 49 |
into an easy to work with form.
|
|
@@ -122,17 +111,17 @@ def load_zip(zip_path: str) -> AllChats:
|
|
| 122 |
continue
|
| 123 |
parsed_list.append(parsed)
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
for parsed in parsed_list:
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
assistant_roles = list(sorted(
|
| 131 |
-
user_roles = list(sorted(
|
| 132 |
-
matrix: Dict[Tuple[str, str],
|
| 133 |
for parsed in parsed_list:
|
| 134 |
key = (parsed['assistant_role'], parsed['user_role'])
|
| 135 |
-
original_task = parsed['original_task']
|
| 136 |
new_item = {
|
| 137 |
k: v
|
| 138 |
for k, v in parsed.items()
|
|
|
|
| 1 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 2 |
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
| 3 |
+
# you may not use this file except in compliance with the License.
|
| 4 |
+
# You may obtain a copy of the License at
|
| 5 |
+
#
|
| 6 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 7 |
+
#
|
| 8 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 9 |
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
| 10 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 11 |
+
# See the License for the specific language governing permissions and
|
| 12 |
+
# limitations under the License.
|
| 13 |
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
| 14 |
"""
|
| 15 |
Everything related to parsing the data JSONs into UI-compatible format.
|
| 16 |
"""
|
| 17 |
|
| 18 |
import glob
|
|
|
|
| 19 |
import os
|
| 20 |
import re
|
| 21 |
+
from typing import Any, Dict, Optional, Tuple, Union
|
|
|
|
| 22 |
|
| 23 |
from tqdm import tqdm
|
| 24 |
|
| 25 |
+
from apps.common.auto_zip import AutoZip
|
| 26 |
+
|
| 27 |
ChatHistory = Dict[str, Any]
|
| 28 |
ParsedChatHistory = Dict[str, Any]
|
| 29 |
AllChats = Dict[str, Any]
|
|
|
|
| 33 |
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
def parse(raw_chat: ChatHistory) -> Union[ParsedChatHistory, None]:
|
| 37 |
""" Gets the JSON raw chat data, validates it and transforms
|
| 38 |
into an easy to work with form.
|
|
|
|
| 111 |
continue
|
| 112 |
parsed_list.append(parsed)
|
| 113 |
|
| 114 |
+
assistant_roles_set = set()
|
| 115 |
+
user_roles_set = set()
|
| 116 |
for parsed in parsed_list:
|
| 117 |
+
assistant_roles_set.add(parsed['assistant_role'])
|
| 118 |
+
user_roles_set.add(parsed['user_role'])
|
| 119 |
+
assistant_roles = list(sorted(assistant_roles_set))
|
| 120 |
+
user_roles = list(sorted(user_roles_set))
|
| 121 |
+
matrix: Dict[Tuple[str, str], Dict[str, Dict]] = dict()
|
| 122 |
for parsed in parsed_list:
|
| 123 |
key = (parsed['assistant_role'], parsed['user_role'])
|
| 124 |
+
original_task: str = parsed['original_task']
|
| 125 |
new_item = {
|
| 126 |
k: v
|
| 127 |
for k, v in parsed.items()
|
sync.sh
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TMP_DIR=/tmp/camel_hf_tmp
|
| 2 |
+
echo $TMP_DIR
|
| 3 |
+
HF_REPO_DIR=`realpath .`
|
| 4 |
+
echo $HF_REPO_DIR
|
| 5 |
+
|
| 6 |
+
mkdir -p $TMP_DIR
|
| 7 |
+
git clone -b hf_spaces_2 https://github.com/lightaime/camel.git $TMP_DIR
|
| 8 |
+
cd $TMP_DIR
|
| 9 |
+
|
| 10 |
+
find apps/data_explorer -name "*.py" | grep -v test | xargs -n 1 -I {} rsync -R {} $HF_REPO_DIR
|
| 11 |
+
find apps/common -name "*.py" | grep -v test | xargs -n 1 -I {} rsync -R {} $HF_REPO_DIR
|
| 12 |
+
|
| 13 |
+
rm -rf $TMP_DIR
|
| 14 |
+
|
| 15 |
+
echo Done
|