|
Home > Archive > Unix Programming > February 2005 > Per thread cpu usage
You are viewing an archived Text-only version of the thread.
To view this thread in it's original format and/or if you want to reply to
this thread please [click here]
| Author |
Per thread cpu usage
|
|
| Christian Panten 2005-02-20, 6:19 pm |
| Hi,
I have a problem to calculate the cpu time used by a thread on AIX 5.1 and
Linux 2.6 with NPTL. In comp.programming.threads noone had a solution.
For example I want to measure the cpu usage of a single function:
void process_job()
{
Timer_Start("process_job");
<do work>
Timer_Stop("process_job");
}
I do not want to measure the cpu usage of the whole process between the
Timer_Start() and Timer_Stop() events. Only the time used in the thread
where process_job() is executed is interesting.
In single threaded applications I can use times() or getrusage() to get the
desired value. Linux running with kernel 2.4 also returns per thread values
from times() and getrusage(). However for kernel 2.6 and also for AIX 5.1
they return the cpu usage of the whole process. I've tried several things:
1. times() and getrusage() return per process cpu usage values, which cannot
be used.
2. With clock_gettime(CLOCK_THREAD_CPUTIME_ID, ..) one can only measure the
realtime elapsed between two timing events. This is shown in the program
below this text.
3. Using pthread_getcpuclockid(pthread_self(),&clock_id); as clockid for
clock_gettime has the same results as point 2.
4. Parsing /proc/self/tasks/<tid>/stat on Linux gives the desired value, but
this is too expensive especially when one wants to measure the cpu time
used by small functions. Additionally the /proc filesystem on AIX seems not
to provide this information.
I. Is there a faster way to obtain the information stored
in /proc/self/tasks/<tid>/stat on Linux?
II. Is there at least one way to get the information on AIX 5.1?
Best regards
Christian
extern "C" {
___#include_<time.h>
___#include_<sys/time.h>
___#include_<sys/times.h>
___#include_<sys/resource.h>
___#include_<errno.h>
___#include_<unistd.h>
___#include_<pthread.h>
}
#include <iostream>
static double timespec_to_double(struct timespec &ts)
{
___const_double_ns_to_sec=_1.0e-9;
___double_t=_double(ts.tv_sec)_+_double(ts.tv_nsec)*ns_to_sec;
___return_t;
}
extern "C" void * execute(void *)
{
___clockid_t_clock_id;
___pthread_getcpuclockid(pthread_self(),
&clock_id);
___struct_timespec_ts;
___//clock_gettime(clock_id,&ts);
___clock_gettime(CLOCK_THREAD_CPUTIME_ID
,_&ts);
___double_cpuTime0=_timespec_to_double(t
s);
___for_(int_i_=_0;i_!=_5;_++i)
___{
______sleep(1);
______//clock_gettime(clock_id,&ts);
______clock_gettime(CLOCK_THREAD_CPUTIME
_ID,_&ts);
______double_cpuTime1=_timespec_to_doubl
e(ts);
______std::cout_<<_"Time:_"_<<_cpuTime1-cpuTime0_<<_std::endl;
___}
___return_0;
}
int main()
{
___pthread_t_tid;
___pthread_create(&tid,_0,_execute,_0);
___pthread_join(tid,_0);
}
[mcr@burns mcr]$ g++ thread.C -pedantic -Wall -W -g -pthread -lrt
[mcr@burns mcr]$ ./a.out
Time: 1.00561
Time: 2.05453
Time: 3.15841
Time: 4.16581
Time: 5.17434
| |
| Christian Panten 2005-02-20, 6:19 pm |
| Hi,
This is the code I used to test the performance of the Linux
proc-filesystem. It shows that there is too much of overhead by parsing
the /proc/self/task/<tid>/stat - file and too much work is done in the
kernel.
Some results are:
mcr@preston:~/tmp/threads/clock> g++ diff.C -pthread -O3 -Wall -W
-Wno-long-long
mcr@preston:~/tmp/threads/clock> time ./a.out
CPU Time: 8.58
real 0m17.599s
user 0m1.306s
sys 0m16.251s
Can anyone speed this up?
Best regards
Christian Panten
extern "C" {
#include <time.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/resource.h>
#include <errno.h>
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
#include <asm/unistd.h>
#include <sys/syscall.h>
#include <linux/unistd.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
}
#include <sstream>
#include <iostream>
static int fd;
static int pos;
_syscall0(pid_t,gettid)
void Timer_Init()
{
std::stringstream ss;
ss << "/proc/self/task/" << gettid() << "/stat";
fd = open(ss.str().c_str(), O_RDONLY);
if (fd < 0) {
std::cerr << "Could not open stat file\n";
exit(-1);
}
char buffer[1000];
pos = read(fd, buffer, 1000);
if (pos < 0) {
std::cerr << "Could not read stat file\n";
exit(-1);
}
while (buffer[--pos] != ')');
pos += 4;
}
double get_cpuusage()
{
lseek(fd, pos, SEEK_SET);
char buffer[1000];
read(fd, buffer, 1000);
int curpos = 0;
for (int i = 0; i != 10; ++i, ++curpos)
{
while (buffer[curpos] != ' ') ++curpos;
}
unsigned long long utime, stime;
sscanf(buffer+curpos, "%Lu %Lu", &utime, &stime);
return (double) (utime+stime) / 100.0;
}
double get_cpuusage_times()
{
static long long clockspersec = sysconf(_SC_CLK_TCK);
struct tms tm;
times(&tm);
return (double) (tm.tms_utime + tm.tms_stime) / clockspersec;
}
static double cputime = 0.0;
void func()
{
double cpuTime0 = get_cpuusage();
for (int i = 0; i != 100; ++i);
double cpuTime1 = get_cpuusage();
cputime += cpuTime1 - cpuTime0;
}
// void func()
// {
// double cpuTime0 = get_cpuusage_times();
// for (int i = 0; i != 100; ++i);
// double cpuTime1 = get_cpuusage_times();
// cputime += cpuTime1 - cpuTime0;
// }
extern "C" void * execute(void *)
{
Timer_Init();
for (int i = 0; i != 1000000; ++i)
func();
std::cout << "CPU Time: " << cputime << std::endl;
return 0;
}
int main()
{
pthread_t tid;
pthread_create(&tid, 0, execute, 0);
pthread_join(tid, 0);
}
|
|
|
|
|