def __init__(self,
browser_type: str = "chrome",
browser_path: Optional[str] = None,
user_data_path: Optional[str] = None,
headless: bool = False,
proxy: Optional[str] = None):
self.browser_type = browser_type
self.browser_path = browser_path
self.user_data_path = user_data_path
self.headless = headless
self.browser_process = None
self.temp_dir = None
self.debugging_port = 9222
self.proxy = proxy
def check_port_open(self, port):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
return sock.connect_ex(('localhost', port)) == 0
def start(self) -> str:
"""
启动浏览器进程并返回CDP端点URL。
如果未提供user_data_dir,则创建一个临时目录。
"""
# 如果需要,创建临时目录
# if not self.user_data_dir:
# self.temp_dir = tempfile.mkdtemp(prefix="browser-profile-")
# self.user_data_dir = self.temp_dir
# 根据操作系统和浏览器类型获取浏览器路径和参数
args = self._get_browser_args()
# 启动浏览器进程
try:
# 判断9222端口是否被占用
if not self.check_port_open(self.debugging_port):
self.browser_process = subprocess.Popen(
args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
time.sleep(1) # 给浏览器一些时间来启动
return f"http://localhost:{self.debugging_port}"
except Exception as e:
self.cleanup()
raise Exception(f"启动浏览器失败: {e}")
def _get_default_browser_path(self):
"""Returns the browser executable path based on OS and browser type"""
if sys.platform == "darwin": # macOS
match self.browser_type:
case 'chrome':
# macos chrome 路径
self.browser_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
case 'firefox':
self.browser_path = "/Applications/Firefox.app/Contents/MacOS/firefox"
case _:
raise NotImplementedError(f"Browser type {self.browser_type} not supported")
elif sys.platform == "win32": # Windows
match self.browser_type:
case 'chrome':
self.browser_path = os.path.expandvars('%PROGRAMFILES%\\Google\\Chrome\\Application\\chrome.exe')
case 'firefox':
self.browser_path = os.path.expandvars('%PROGRAMFILES%\\Mozilla Firefox\\firefox.exe')
case 'edge':
self.browser_path = os.path.expandvars('%PROGRAMFILES(X86)%\\Microsoft\\Edge\\Application\\msedge.exe')
case 'brave':
self.browser_path = os.path.expandvars('%PROGRAMFILES%\\BraveSoftware\\Brave-Browser\\Application\\brave.exe')
case _:
raise NotImplementedError(f"Browser type {self.browser_type} not supported")
else: # Linux
match self.browser_type:
case 'chrome':
self.browser_path = "google-chrome"
case 'firefox':
self.browser_path = "firefox"
case _:
raise NotImplementedError(f"Browser type {self.browser_type} not supported")
def _get_default_user_data_path(self):
if sys.platform == "darwin": # macOS
self.user_data_path = os.path.expandvars('%LOCALAPPDATA%\\Google\\Chrome\\User Data')
elif sys.platform == "win32": # Windows
match self.browser_type:
case 'chrome':
self.user_data_path = os.path.expandvars('%LOCALAPPDATA%\\Google\\Chrome\\User Data')
case 'firefox':
self.user_data_path = os.path.expandvars('%LOCALAPPDATA%\\Mozilla\\Firefox\\Profiles')
case 'edge':
self.user_data_path = os.path.expandvars('%LOCALAPPDATA%\\Microsoft\\Edge\\User Data')
case 'brave':
self.user_data_path = os.path.expandvars('%LOCALAPPDATA%\\BraveSoftware\\Brave-Browser\\User Data')
case _:
raise NotImplementedError(f"Browser type {self.browser_type} not supported")
else: # Linux
match self.browser_type:
case 'chrome':
self.user_data_path = os.path.expandvars('%HOME%/.config/google-chrome/Default')
case 'firefox':
self.user_data_path = os.path.expandvars('%HOME%/.mozilla/firefox')
case _:
raise NotImplementedError(f"Browser type {self.browser_type} not supported")
def _get_browser_args(self) -> List[str]:
"""Returns browser-specific command line arguments"""
if not self.browser_path:
self._get_default_browser_path()
if not self.user_data_path:
self._get_default_user_data_path()
if not os.path.exists(self.user_data_path):
os.makedirs(self.user_data_path)
if self.browser_type in ['chrome', 'edge', 'brave']:
args = [
self.browser_path,
f"--remote-debugging-port={self.debugging_port}",
f"--user-data-dir={self.user_data_path}",
]
if self.headless:
args.append("--headless=new")
if self.proxy:
args.append(f"--proxy-server={self.proxy}")
elif self.browser_type == "firefox":
args = [
self.browser_path,
"--remote-debugging-port", str(self.debugging_port),
"--profile", self.user_data_path,
]
if self.headless:
args.append("--headless")
if self.proxy:
args.append(f"--proxy-server={self.proxy}")
else:
raise NotImplementedError(f"Browser type {self.browser_type} not supported")
return args
启动后会放回一个本地URL,目前只适用于win环境
打开浏览器后可用
playwright、selenium等自动化工具连接浏览器
[Python] 纯文本查看 复制代码webdriver.Chrome.connect_over_cdp("localhost", 9222)
[Python] 纯文本查看 复制代码playwright.firefox.connect_over_cdp("http://localhost:9222")