Update api_usage.py
Browse files- api_usage.py +57 -21
api_usage.py
CHANGED
|
@@ -16,13 +16,13 @@ GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4o",
|
|
| 16 |
|
| 17 |
TOKEN_LIMIT_PER_TIER_TURBO = {
|
| 18 |
"free": 40000,
|
| 19 |
-
"tier-1":
|
| 20 |
"tier-1(old?)": 90000,
|
| 21 |
-
"tier-2":
|
| 22 |
-
"tier-3":
|
| 23 |
-
"tier-4":
|
| 24 |
-
"tier-5-old":
|
| 25 |
-
"tier-5":
|
| 26 |
}
|
| 27 |
TOKEN_LIMIT_PER_TIER_GPT4 = {
|
| 28 |
"tier-1": 10000,
|
|
@@ -61,12 +61,18 @@ def get_subscription(key, session, org_list):
|
|
| 61 |
list_models_avai = set()
|
| 62 |
|
| 63 |
for org_in in org_list:
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
if org_in['id']:
|
| 66 |
if org_in['is_default']:
|
| 67 |
default_org = org_in['name']
|
| 68 |
org_description.append(f"{org_in['description']} (Created: {datetime.utcfromtimestamp(org_in['created'])} UTC" + (", personal)" if org_in['personal'] else ")"))
|
| 69 |
-
|
|
|
|
| 70 |
has_gpt4_32k = True if GPT_TYPES[2] in available_models else False
|
| 71 |
has_gpt4_32k_0314 = True if GPT_TYPES[3] in available_models else False
|
| 72 |
has_gpt4 = True if GPT_TYPES[1] in available_models else False
|
|
@@ -76,18 +82,47 @@ def get_subscription(key, session, org_list):
|
|
| 76 |
org.append(f"{org_in['id']} ({org_in['name']}, {org_in['title']}, {org_in['role']})")
|
| 77 |
if has_gpt4_32k:
|
| 78 |
list_models_avai.update(GPT_TYPES)
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
rpm.append(status_formated[0])
|
| 81 |
tpm.append(status_formated[1])
|
| 82 |
quota.append(status_formated[2])
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
else:
|
| 85 |
list_models_avai.update([GPT_TYPES[3], GPT_TYPES[1], GPT_TYPES[0]])
|
| 86 |
status_formated = format_status([GPT_TYPES[3], GPT_TYPES[4], GPT_TYPES[5], GPT_TYPES[1], GPT_TYPES[0]], session, headers)
|
| 87 |
rpm.append(status_formated[0])
|
| 88 |
tpm.append(status_formated[1])
|
| 89 |
quota.append(status_formated[2])
|
| 90 |
-
list_models.append(f"gpt-4-32k-0314, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo ({len(available_models)} total)")
|
| 91 |
|
| 92 |
elif has_gpt4:
|
| 93 |
if org_in['id']:
|
|
@@ -123,6 +158,7 @@ def send_oai_completions(oai_stuff):
|
|
| 123 |
session = oai_stuff[0]
|
| 124 |
headers = oai_stuff[1]
|
| 125 |
model = oai_stuff[2]
|
|
|
|
| 126 |
try:
|
| 127 |
req_body = {"model": model, "max_tokens": 1}
|
| 128 |
rpm_string = ""
|
|
@@ -134,6 +170,8 @@ def send_oai_completions(oai_stuff):
|
|
| 134 |
e = result.get("error", {}).get("code", "")
|
| 135 |
if e == None or e == 'missing_required_parameter':
|
| 136 |
rpm_num = int(r.headers.get("x-ratelimit-limit-requests", 0))
|
|
|
|
|
|
|
| 137 |
tpm_num = int(r.headers.get('x-ratelimit-limit-tokens', 0))
|
| 138 |
tpm_left = int(r.headers.get('x-ratelimit-remaining-tokens', 0))
|
| 139 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
|
@@ -160,20 +198,22 @@ def send_oai_completions(oai_stuff):
|
|
| 160 |
rpm_string = f"0 ({model})"
|
| 161 |
tpm_string = f"0 ({model})"
|
| 162 |
quota_string = e
|
| 163 |
-
return rpm_string, tpm_string, quota_string
|
| 164 |
except Exception as e:
|
| 165 |
#print(e)
|
| 166 |
-
return "", "", ""
|
| 167 |
|
| 168 |
def format_status(list_models_avai, session, headers):
|
| 169 |
rpm = []
|
| 170 |
tpm = []
|
|
|
|
| 171 |
quota = ""
|
| 172 |
args = [(session, headers, model) for model in list_models_avai]
|
| 173 |
with concurrent.futures.ThreadPoolExecutor() as executer:
|
| 174 |
for result in executer.map(send_oai_completions, args):
|
| 175 |
rpm.append(result[0])
|
| 176 |
tpm.append(result[1])
|
|
|
|
| 177 |
if result[2]:
|
| 178 |
if quota == 'yes | custom-tier':
|
| 179 |
continue
|
|
@@ -184,7 +224,7 @@ def format_status(list_models_avai, session, headers):
|
|
| 184 |
for i in range(len(rpm)):
|
| 185 |
rpm_str += rpm[i] + (" | " if i < len(rpm)-1 else "")
|
| 186 |
tpm_str += tpm[i] + (" | " if i < len(rpm)-1 else "")
|
| 187 |
-
return rpm_str, tpm_str, quota
|
| 188 |
|
| 189 |
def check_key_tier(rpm, tpm, dict, headers):
|
| 190 |
dictItemsCount = len(dict)
|
|
@@ -216,6 +256,8 @@ def get_models(session, key, org: str = None):
|
|
| 216 |
try:
|
| 217 |
rq = session.get(f"{BASE_URL}/models", headers=headers, timeout=10)
|
| 218 |
avai_models = rq.json()
|
|
|
|
|
|
|
| 219 |
list_models = [model["id"] for model in avai_models["data"]] #[model["id"] for model in avai_models["data"] if model["id"] in GPT_TYPES]
|
| 220 |
except:
|
| 221 |
list_models = []
|
|
@@ -265,7 +307,6 @@ async def check_ant_rate_limit(key):
|
|
| 265 |
tasks = [fetch_ant(async_session, json_data) for _ in range(max_requests)]
|
| 266 |
results = await asyncio.gather(*tasks)
|
| 267 |
count = 0
|
| 268 |
-
#print(results)
|
| 269 |
for result in results:
|
| 270 |
if result:
|
| 271 |
count+=1
|
|
@@ -273,7 +314,6 @@ async def check_ant_rate_limit(key):
|
|
| 273 |
return f'{max_requests} or above'
|
| 274 |
return count
|
| 275 |
except Exception as e:
|
| 276 |
-
#print(e)
|
| 277 |
return 0
|
| 278 |
|
| 279 |
def check_ant_tier(rpm):
|
|
@@ -337,7 +377,6 @@ def check_key_gemini_availability(key):
|
|
| 337 |
else:
|
| 338 |
return False, None
|
| 339 |
except Exception as e:
|
| 340 |
-
#print(e)
|
| 341 |
return 'Error while making request.', None
|
| 342 |
|
| 343 |
def check_key_azure_availability(endpoint, api_key):
|
|
@@ -356,7 +395,6 @@ def check_key_azure_availability(endpoint, api_key):
|
|
| 356 |
models = [m["id"] for m in rq["data"] if len(m["capabilities"]["scale_types"])>0]
|
| 357 |
return True, models
|
| 358 |
except Exception as e:
|
| 359 |
-
#print(e)
|
| 360 |
return False, None
|
| 361 |
|
| 362 |
def get_azure_deploy(endpoint, api_key):
|
|
@@ -516,7 +554,6 @@ async def check_key_aws_availability(key):
|
|
| 516 |
iam = session.client('iam')
|
| 517 |
|
| 518 |
username = check_username(session)
|
| 519 |
-
#print(username)
|
| 520 |
if not username[0]:
|
| 521 |
return False, username[1]
|
| 522 |
|
|
@@ -578,7 +615,6 @@ def is_model_working(form_info, model_info):
|
|
| 578 |
return model_info['agreementAvailability']['errorMessage']
|
| 579 |
return "No"
|
| 580 |
except:
|
| 581 |
-
#print(form_status)
|
| 582 |
return "No"
|
| 583 |
|
| 584 |
async def get_model_status(session, key, secret, region, model_name, form_info):
|
|
@@ -621,7 +657,7 @@ async def check_bedrock_claude_status(session, key, secret):
|
|
| 621 |
if region and model_name:
|
| 622 |
if msg == "Maybe":
|
| 623 |
invoke_info = await send_signed_request_bedrock(session, payload, f"anthropic.{model_name}", key, secret, region)
|
| 624 |
-
if 'messages.0' in invoke_info.get('message'):
|
| 625 |
models[model_name].append(f'{region}: may be Unavailable if disabled')
|
| 626 |
else:
|
| 627 |
models[model_name].append(region)
|
|
|
|
| 16 |
|
| 17 |
TOKEN_LIMIT_PER_TIER_TURBO = {
|
| 18 |
"free": 40000,
|
| 19 |
+
"tier-1": 200000,
|
| 20 |
"tier-1(old?)": 90000,
|
| 21 |
+
"tier-2/tier-5-old": 2000000,
|
| 22 |
+
"tier-3": 4000000,
|
| 23 |
+
"tier-4": 10000000,
|
| 24 |
+
"tier-5-old": 15000000,
|
| 25 |
+
"tier-5": 50000000
|
| 26 |
}
|
| 27 |
TOKEN_LIMIT_PER_TIER_GPT4 = {
|
| 28 |
"tier-1": 10000,
|
|
|
|
| 61 |
list_models_avai = set()
|
| 62 |
|
| 63 |
for org_in in org_list:
|
| 64 |
+
if len(org_list) < 2: # mismatch_organization
|
| 65 |
+
headers = get_headers(key)
|
| 66 |
+
available_models = get_models(session, key)
|
| 67 |
+
else:
|
| 68 |
+
headers = get_headers(key, org_in['id'])
|
| 69 |
+
available_models = get_models(session, key, org_in['id'])
|
| 70 |
if org_in['id']:
|
| 71 |
if org_in['is_default']:
|
| 72 |
default_org = org_in['name']
|
| 73 |
org_description.append(f"{org_in['description']} (Created: {datetime.utcfromtimestamp(org_in['created'])} UTC" + (", personal)" if org_in['personal'] else ")"))
|
| 74 |
+
if 'No perm' in available_models:
|
| 75 |
+
available_models.extend(GPT_TYPES)
|
| 76 |
has_gpt4_32k = True if GPT_TYPES[2] in available_models else False
|
| 77 |
has_gpt4_32k_0314 = True if GPT_TYPES[3] in available_models else False
|
| 78 |
has_gpt4 = True if GPT_TYPES[1] in available_models else False
|
|
|
|
| 82 |
org.append(f"{org_in['id']} ({org_in['name']}, {org_in['title']}, {org_in['role']})")
|
| 83 |
if has_gpt4_32k:
|
| 84 |
list_models_avai.update(GPT_TYPES)
|
| 85 |
+
if 'No perm' in available_models:
|
| 86 |
+
status_formated = format_status(GPT_TYPES, session, headers)
|
| 87 |
+
else:
|
| 88 |
+
status_formated = format_status([GPT_TYPES[2], GPT_TYPES[4], GPT_TYPES[5], GPT_TYPES[1], GPT_TYPES[0]], session, headers)
|
| 89 |
rpm.append(status_formated[0])
|
| 90 |
tpm.append(status_formated[1])
|
| 91 |
quota.append(status_formated[2])
|
| 92 |
+
if 'No perm' in available_models:
|
| 93 |
+
lst_string = ""
|
| 94 |
+
length = len(status_formated[3])
|
| 95 |
+
count = 1
|
| 96 |
+
for k, v in status_formated[3].items():
|
| 97 |
+
if v:
|
| 98 |
+
if count < length:
|
| 99 |
+
lst_string += f'{k}, '
|
| 100 |
+
continue
|
| 101 |
+
else:
|
| 102 |
+
lst_string += f' {k} '
|
| 103 |
+
if v == False:
|
| 104 |
+
list_models_avai.remove(k)
|
| 105 |
+
if k == GPT_TYPES[2]:
|
| 106 |
+
has_gpt4_32k = False
|
| 107 |
+
elif k == GPT_TYPES[1]:
|
| 108 |
+
has_gpt4 = False
|
| 109 |
+
elif k == GPT_TYPES[0]:
|
| 110 |
+
has_35 = False
|
| 111 |
+
elif k == GPT_TYPES[4]:
|
| 112 |
+
has_4o = False
|
| 113 |
+
count += 1
|
| 114 |
+
lst_string += '(No get model permission)'
|
| 115 |
+
#list_models.append(f"gpt-4-32k, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo (No get model permission)")
|
| 116 |
+
list_models.append(lst_string)
|
| 117 |
+
else:
|
| 118 |
+
list_models.append(f"gpt-4-32k, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo ({len(available_models)} total)")
|
| 119 |
else:
|
| 120 |
list_models_avai.update([GPT_TYPES[3], GPT_TYPES[1], GPT_TYPES[0]])
|
| 121 |
status_formated = format_status([GPT_TYPES[3], GPT_TYPES[4], GPT_TYPES[5], GPT_TYPES[1], GPT_TYPES[0]], session, headers)
|
| 122 |
rpm.append(status_formated[0])
|
| 123 |
tpm.append(status_formated[1])
|
| 124 |
quota.append(status_formated[2])
|
| 125 |
+
list_models.append(f"gpt-4-32k-0314, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo ({len(available_models)} total)")
|
| 126 |
|
| 127 |
elif has_gpt4:
|
| 128 |
if org_in['id']:
|
|
|
|
| 158 |
session = oai_stuff[0]
|
| 159 |
headers = oai_stuff[1]
|
| 160 |
model = oai_stuff[2]
|
| 161 |
+
model_status = False
|
| 162 |
try:
|
| 163 |
req_body = {"model": model, "max_tokens": 1}
|
| 164 |
rpm_string = ""
|
|
|
|
| 170 |
e = result.get("error", {}).get("code", "")
|
| 171 |
if e == None or e == 'missing_required_parameter':
|
| 172 |
rpm_num = int(r.headers.get("x-ratelimit-limit-requests", 0))
|
| 173 |
+
if rpm_num > 0:
|
| 174 |
+
model_status = True
|
| 175 |
tpm_num = int(r.headers.get('x-ratelimit-limit-tokens', 0))
|
| 176 |
tpm_left = int(r.headers.get('x-ratelimit-remaining-tokens', 0))
|
| 177 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
|
|
|
| 198 |
rpm_string = f"0 ({model})"
|
| 199 |
tpm_string = f"0 ({model})"
|
| 200 |
quota_string = e
|
| 201 |
+
return rpm_string, tpm_string, quota_string, model, model_status
|
| 202 |
except Exception as e:
|
| 203 |
#print(e)
|
| 204 |
+
return "", "", "", model, model_status
|
| 205 |
|
| 206 |
def format_status(list_models_avai, session, headers):
|
| 207 |
rpm = []
|
| 208 |
tpm = []
|
| 209 |
+
model_status = {}
|
| 210 |
quota = ""
|
| 211 |
args = [(session, headers, model) for model in list_models_avai]
|
| 212 |
with concurrent.futures.ThreadPoolExecutor() as executer:
|
| 213 |
for result in executer.map(send_oai_completions, args):
|
| 214 |
rpm.append(result[0])
|
| 215 |
tpm.append(result[1])
|
| 216 |
+
model_status[result[3]] = result[4]
|
| 217 |
if result[2]:
|
| 218 |
if quota == 'yes | custom-tier':
|
| 219 |
continue
|
|
|
|
| 224 |
for i in range(len(rpm)):
|
| 225 |
rpm_str += rpm[i] + (" | " if i < len(rpm)-1 else "")
|
| 226 |
tpm_str += tpm[i] + (" | " if i < len(rpm)-1 else "")
|
| 227 |
+
return rpm_str, tpm_str, quota, model_status
|
| 228 |
|
| 229 |
def check_key_tier(rpm, tpm, dict, headers):
|
| 230 |
dictItemsCount = len(dict)
|
|
|
|
| 256 |
try:
|
| 257 |
rq = session.get(f"{BASE_URL}/models", headers=headers, timeout=10)
|
| 258 |
avai_models = rq.json()
|
| 259 |
+
if rq.status_code == 403:
|
| 260 |
+
return ['No perm']
|
| 261 |
list_models = [model["id"] for model in avai_models["data"]] #[model["id"] for model in avai_models["data"] if model["id"] in GPT_TYPES]
|
| 262 |
except:
|
| 263 |
list_models = []
|
|
|
|
| 307 |
tasks = [fetch_ant(async_session, json_data) for _ in range(max_requests)]
|
| 308 |
results = await asyncio.gather(*tasks)
|
| 309 |
count = 0
|
|
|
|
| 310 |
for result in results:
|
| 311 |
if result:
|
| 312 |
count+=1
|
|
|
|
| 314 |
return f'{max_requests} or above'
|
| 315 |
return count
|
| 316 |
except Exception as e:
|
|
|
|
| 317 |
return 0
|
| 318 |
|
| 319 |
def check_ant_tier(rpm):
|
|
|
|
| 377 |
else:
|
| 378 |
return False, None
|
| 379 |
except Exception as e:
|
|
|
|
| 380 |
return 'Error while making request.', None
|
| 381 |
|
| 382 |
def check_key_azure_availability(endpoint, api_key):
|
|
|
|
| 395 |
models = [m["id"] for m in rq["data"] if len(m["capabilities"]["scale_types"])>0]
|
| 396 |
return True, models
|
| 397 |
except Exception as e:
|
|
|
|
| 398 |
return False, None
|
| 399 |
|
| 400 |
def get_azure_deploy(endpoint, api_key):
|
|
|
|
| 554 |
iam = session.client('iam')
|
| 555 |
|
| 556 |
username = check_username(session)
|
|
|
|
| 557 |
if not username[0]:
|
| 558 |
return False, username[1]
|
| 559 |
|
|
|
|
| 615 |
return model_info['agreementAvailability']['errorMessage']
|
| 616 |
return "No"
|
| 617 |
except:
|
|
|
|
| 618 |
return "No"
|
| 619 |
|
| 620 |
async def get_model_status(session, key, secret, region, model_name, form_info):
|
|
|
|
| 657 |
if region and model_name:
|
| 658 |
if msg == "Maybe":
|
| 659 |
invoke_info = await send_signed_request_bedrock(session, payload, f"anthropic.{model_name}", key, secret, region)
|
| 660 |
+
if 'messages.0' in invoke_info.get('message') or 'many requests' in invoke_info.get('message'):
|
| 661 |
models[model_name].append(f'{region}: may be Unavailable if disabled')
|
| 662 |
else:
|
| 663 |
models[model_name].append(region)
|