simplification: remove zimit user, su, and run chrome as root with --no-sandbox

log exclusion regex
This commit is contained in:
Ilya Kreymer 2020-10-16 21:02:35 +00:00
parent 5b3101f2d8
commit 2e2db2f352
3 changed files with 5 additions and 11 deletions

View File

@ -22,17 +22,10 @@ RUN pip install git+https://github.com/webrecorder/pywb@patch-work
COPY --from=chrome /usr/lib/x86_64-linux-gnu/ /usr/lib/x86_64-linux-gnu/
COPY --from=chrome /lib/x86_64-linux-gnu/libdbus* /lib/x86_64-linux-gnu/
RUN useradd zimit --shell /bin/bash --create-home \
&& usermod -a -G sudo zimit \
&& echo 'ALL ALL = (ALL) NOPASSWD: ALL' >> /etc/sudoers \
&& echo 'zimit:secret' | chpasswd
WORKDIR /app
ADD package.json /app/
RUN chown -R zimit /app
RUN yarn install
ADD config.yaml /app/

View File

@ -28,7 +28,8 @@ async function run(params) {
// Chrome Flags, including proxy server
const args = [
"--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically)
`--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`
`--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`,
"--no-sandbox"
];
// prefix for direct capture via pywb
@ -313,6 +314,8 @@ async function main() {
})
.argv;
console.log("Exclusions Regexes: ", params.exclude);
try {
await run(params);
process.exit(0);

View File

@ -96,7 +96,6 @@ def zimit(args=None):
# make temp dir for this crawl and make it all writeable+all readable+all exec
temp_root_dir = tempfile.mkdtemp(dir=zimit_args.output, prefix=".tmp")
os.chmod(temp_root_dir, stat.S_IROTH | stat.S_IWOTH | stat.S_IXOTH)
if not zimit_args.keep:
@ -133,8 +132,7 @@ def zimit(args=None):
print("")
print("----------")
print("running zimit driver: " + cmd_line)
su_cmd = ["su", "zimit", "-c", cmd_line]
subprocess.run(su_cmd, check=True) # nosec
subprocess.run(cmd_args, check=True) # nosec
warc_files = glob.glob(
os.path.join(temp_root_dir, "collections/capture/archive/*.warc.gz")