Skip to content

Commit 1f7e533

Browse files
authored
Merge pull request #1031 from ScrapeGraphAI/posthog-proxy-implement
added posthog proxy
2 parents f038ca1 + e230856 commit 1f7e533

File tree

1 file changed

+34
-86
lines changed

1 file changed

+34
-86
lines changed
Lines changed: 34 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,3 @@
1-
"""
2-
This module contains code that relates to sending ScrapeGraphAI usage telemetry.
3-
4-
To disable sending telemetry there are three ways:
5-
6-
1. Set it to false programmatically in your driver:
7-
>>> from scrapegraphai import telemetry
8-
>>> telemetry.disable_telemetry()
9-
2. Set it to `false` in ~/.scrapegraphai.conf under `DEFAULT`
10-
[DEFAULT]
11-
telemetry_enabled = False
12-
3. Set SCRAPEGRAPHAI_TELEMETRY_ENABLED=false as an environment variable:
13-
SCRAPEGRAPHAI_TELEMETRY_ENABLED=false python run.py
14-
or:
15-
export SCRAPEGRAPHAI_TELEMETRY_ENABLED=false
16-
"""
17-
181
import configparser
192
import functools
203
import importlib.metadata
@@ -27,17 +10,19 @@
2710
from typing import Callable, Dict
2811
from urllib import request
2912

13+
# Load version
3014
VERSION = importlib.metadata.version("scrapegraphai")
3115
STR_VERSION = ".".join([str(i) for i in VERSION])
32-
HOST = "https://eu.i.posthog.com"
33-
TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints
34-
API_KEY = "phc_orsfU4aHhtpTSLVcUE2hdUkQDLM4OEQZndKGFBKMEtn"
16+
17+
# 🚀 Your proxy service endpoint (instead of PostHog)
18+
PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/"
19+
3520
TIMEOUT = 2
3621
DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
3722

3823
logger = logging.getLogger(__name__)
3924

40-
25+
# Everything below remains mostly same
4126
def _load_config(config_location: str) -> configparser.ConfigParser:
4227
config = configparser.ConfigParser()
4328
try:
@@ -59,28 +44,22 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
5944
return config
6045

6146

62-
def _check_config_and_environ_for_telemetry_flag(
63-
telemetry_default: bool, config_obj: configparser.ConfigParser
64-
) -> bool:
65-
telemetry_enabled = telemetry_default
47+
def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj):
48+
telemetry_enabled = default_value
6649
if "telemetry_enabled" in config_obj["DEFAULT"]:
6750
try:
6851
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
69-
except ValueError as e:
70-
logger.debug(
71-
f"""Unable to parse value for
72-
`telemetry_enabled` from config. Encountered {e}"""
73-
)
52+
except Exception:
53+
pass
54+
7455
if os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") is not None:
75-
env_value = os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED")
76-
config_obj["DEFAULT"]["telemetry_enabled"] = env_value
7756
try:
78-
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
79-
except ValueError as e:
80-
logger.debug(
81-
f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED`
82-
from environment. Encountered {e}"""
57+
telemetry_enabled = config_obj.getboolean(
58+
"DEFAULT", "telemetry_enabled"
8359
)
60+
except Exception:
61+
pass
62+
8463
return telemetry_enabled
8564

8665

@@ -90,87 +69,70 @@ def _check_config_and_environ_for_telemetry_flag(
9069
CALL_COUNTER = 0
9170
MAX_COUNT_SESSION = 1000
9271

72+
9373
BASE_PROPERTIES = {
9474
"os_type": os.name,
9575
"os_version": platform.platform(),
9676
"python_version": f"{platform.python_version()}/{platform.python_implementation()}",
9777
"distinct_id": g_anonymous_id,
9878
"scrapegraphai_version": VERSION,
99-
"telemetry_version": "0.0.3",
79+
"telemetry_version": "0.0.4-proxy",
10080
}
10181

10282

10383
def disable_telemetry():
104-
"""
105-
function for disabling the telemetries
106-
"""
10784
global g_telemetry_enabled
10885
g_telemetry_enabled = False
10986

11087

11188
def is_telemetry_enabled() -> bool:
112-
"""
113-
function for checking if a telemetry is enables
114-
"""
11589
if g_telemetry_enabled:
11690
global CALL_COUNTER
117-
if CALL_COUNTER == 0:
118-
logger.debug(
119-
"Note: ScrapeGraphAI collects anonymous usage data to improve the library. "
120-
"You can disable telemetry by setting SCRAPEGRAPHAI_TELEMETRY_ENABLED=false or "
121-
"by editing ~/.scrapegraphai.conf."
122-
)
12391
CALL_COUNTER += 1
12492
if CALL_COUNTER > MAX_COUNT_SESSION:
12593
return False
12694
return True
127-
else:
128-
return False
95+
return False
12996

13097

98+
# ⭐ UPDATED FOR PROXY — send without API key
13199
def _send_event_json(event_json: dict):
132100
headers = {
133101
"Content-Type": "application/json",
134-
"Authorization": f"Bearer {API_KEY}",
135102
"User-Agent": f"scrapegraphai/{STR_VERSION}",
136103
}
137104
try:
138105
data = json.dumps(event_json).encode()
139-
req = request.Request(TRACK_URL, data=data, headers=headers)
106+
req = request.Request(PROXY_URL, data=data, headers=headers)
107+
140108
with request.urlopen(req, timeout=TIMEOUT) as f:
141-
res = f.read()
109+
response_body = f.read()
142110
if f.code != 200:
143-
raise RuntimeError(res)
111+
raise RuntimeError(response_body)
144112
except Exception as e:
145-
logger.debug(f"Failed to send telemetry data: {e}")
113+
logger.debug(f"Failed to send telemetry data to proxy: {e}")
146114
else:
147-
logger.debug(f"Telemetry data sent: {data}")
115+
logger.debug(f"Telemetry payload forwarded to proxy: {data}")
148116

149117

150118
def send_event_json(event_json: dict):
151-
"""
152-
fucntion for sending event json
153-
"""
154119
if not g_telemetry_enabled:
155120
raise RuntimeError("Telemetry tracking is disabled!")
156121
try:
157122
th = threading.Thread(target=_send_event_json, args=(event_json,))
158123
th.start()
159124
except Exception as e:
160-
logger.debug(f"Failed to send telemetry data in a thread: {e}")
125+
logger.debug(f"Telemetry dispatch thread failed: {e}")
161126

162127

163128
def log_event(event: str, properties: Dict[str, any]):
164-
"""
165-
function for logging the events
166-
"""
167129
if is_telemetry_enabled():
168-
event_json = {
169-
"api_key": API_KEY,
130+
payload = {
170131
"event": event,
132+
"distinct_id": g_anonymous_id,
171133
"properties": {**BASE_PROPERTIES, **properties},
172134
}
173-
send_event_json(event_json)
135+
send_event_json(payload)
174136

175137

176138
def log_graph_execution(
@@ -188,10 +150,7 @@ def log_graph_execution(
188150
exception: str = None,
189151
total_tokens: int = None,
190152
):
191-
"""
192-
function for logging the graph execution
193-
"""
194-
properties = {
153+
props = {
195154
"graph_name": graph_name,
196155
"source": source,
197156
"prompt": prompt,
@@ -207,26 +166,15 @@ def log_graph_execution(
207166
"total_tokens": total_tokens,
208167
"type": "community-library",
209168
}
210-
log_event("graph_execution", properties)
169+
log_event("graph_execution", props)
211170

212171

213172
def capture_function_usage(call_fn: Callable) -> Callable:
214-
"""
215-
function that captures the usage
216-
"""
217-
218173
@functools.wraps(call_fn)
219174
def wrapped_fn(*args, **kwargs):
220175
try:
221176
return call_fn(*args, **kwargs)
222177
finally:
223178
if is_telemetry_enabled():
224-
try:
225-
function_name = call_fn.__name__
226-
log_event("function_usage", {"function_name": function_name})
227-
except Exception as e:
228-
logger.debug(
229-
f"Failed to send telemetry for function usage. Encountered: {e}"
230-
)
231-
232-
return wrapped_fn
179+
log_event("function_usage", {"function_name": call_fn.__name__})
180+
return wrapped_fn

0 commit comments

Comments
 (0)