headers = {\"Authorization\": \"Bearer token\"}\n
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
output = query({
\"inputs\": \"Can you please let us know more details about your \",
})
\nprint(output)
\n","updatedAt":"2023-04-21T22:13:30.142Z","author":{"_id":"61d089b203bc10eb8e1c296d","avatarUrl":"/avatars/6d109e0168f333c06f2729c292eb3149.svg","fullname":"Tristo","name":"Tristo","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false}},"numEdits":0,"editors":["Tristo"],"editorAvatarUrls":["/avatars/6d109e0168f333c06f2729c292eb3149.svg"],"reactions":[],"isReport":false}}],"pinned":false,"locked":false,"collection":"discussions","isPullRequest":false,"isReport":false},"repo":{"name":"bigscience/bloom","type":"model"},"activeTab":"discussion","discussionRole":0,"watched":false,"muted":false,"repoDiscussionsLocked":false}">Choosing sampling or greedy.
headers = {\"Authorization\": \"Bearer token\"}\n
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
output = query({
\"inputs\": \"Can you please let us know more details about your \",
})
\nprint(output)
\n","updatedAt":"2023-04-21T22:13:30.142Z","author":{"_id":"61d089b203bc10eb8e1c296d","avatarUrl":"/avatars/6d109e0168f333c06f2729c292eb3149.svg","fullname":"Tristo","name":"Tristo","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false}},"numEdits":0,"editors":["Tristo"],"editorAvatarUrls":["/avatars/6d109e0168f333c06f2729c292eb3149.svg"],"reactions":[],"isReport":false}}],"pinned":false,"locked":false,"collection":"discussions","isPullRequest":false,"isReport":false},"primaryEmailConfirmed":false,"repo":{"name":"bigscience/bloom","type":"model"},"discussionRole":0,"acceptLanguages":["*"],"hideComments":true,"repoDiscussionsLocked":false,"isDiscussionAuthor":false}">On bigscience/bloom on the huggingface thing, there is an option for sampling or greedy. When we use the API, it automatically uses greedy (I assume). how do I switch that to sampling?
import requests
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
headers = {"Authorization": "Bearer token"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
output = query({
"inputs": "Can you please let us know more details about your ",
})
print(output)