|
Home > Archive > Unix Shell > May 2007 > extract lines from file
You are viewing an archived Text-only version of the thread.
To view this thread in it's original format and/or if you want to reply to
this thread please [click here]
| Author |
extract lines from file
|
|
| sonal10july@gmail.com 2007-05-22, 7:26 pm |
| Hi Friends,
I want all those lines lines from file starting with keyword 'INSERT'
and end with ')'
Note: In following example there are two ')' (one after 'INSERT' and
another after 'VALUES') . I want all those lines between INSERT and
second occurence of ')'
############
Input File
########################################
###################################
INSERT common..dividend
( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
)
VALUES (306181, 51758, '2007-05-11', '2007-05-21',
'2007-05-21', '2007-05-24', 0.19,
'I', 0.7426385949277784, null, 4.818, null,
null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
105,
105, 19874739, null
)
May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
accrueDividends
INFO: START of accrual process for tid: 51745
May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
accrueDividends
INFO: Instrument tid is 51745
INSERT common..dividend (
dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
)
VALUES (306177, 51745, '2007-05-11', '2007-05-21',
'2007-05-21', '2007-05-24', 0.015,
'I', 0.7426385949277784, null, 1.381, null,
null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
105,
105, 19874793, null
)
########################################
###################################
Output should be like following
======================
INSERT common..dividend
( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
)
VALUES (306181, 51758, '2007-05-11', '2007-05-21',
'2007-05-21', '2007-05-24', 0.19,
'I', 0.7426385949277784, null, 4.818, null,
null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
105,
105, 19874739, null
)
INSERT common..dividend
( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
)
VALUES (306181, 51758, '2007-05-11', '2007-05-21',
'2007-05-21', '2007-05-24', 0.19,
'I', 0.7426385949277784, null, 4.818, null,
null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
105,
105, 19874739, null
)
Thank you all in advance for your help.
Regards
SK
| |
| Janis Papanagnou 2007-05-22, 7:26 pm |
| sonal10july@gmail.com wrote:
> Hi Friends,
>
> I want all those lines lines from file starting with keyword 'INSERT'
> and end with ')'
> Note: In following example there are two ')' (one after 'INSERT' and
> another after 'VALUES') . I want all those lines between INSERT and
> second occurence of ')'
It's really hard to decipher your data [ - I hope that someday people
would take more care about how they present their data structures - ].
But you may try whether the following code suffices for your request...
awk '/INSERT/,/)/;/VALUES/,/)/' < inputfile > output_file
Janis
> ############
> Input File
> ########################################
###################################
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: START of accrual process for tid: 51745
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: Instrument tid is 51745
>
> INSERT common..dividend (
>
> dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306177, 51745, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.015,
> 'I', 0.7426385949277784, null, 1.381, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874793, null
> )
>
> ########################################
###################################
>
> Output should be like following
> ======================
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
>
> Thank you all in advance for your help.
>
> Regards
> SK
>
| |
| Michael Tosch 2007-05-22, 7:26 pm |
| sonal10july@gmail.com wrote:
> Hi Friends,
>
> I want all those lines lines from file starting with keyword 'INSERT'
> and end with ')'
> Note: In following example there are two ')' (one after 'INSERT' and
> another after 'VALUES') . I want all those lines between INSERT and
> second occurence of ')'
> ############
> Input File
> ########################################
###################################
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: START of accrual process for tid: 51745
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: Instrument tid is 51745
>
> INSERT common..dividend (
>
> dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306177, 51745, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.015,
> 'I', 0.7426385949277784, null, 1.381, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874793, null
> )
>
> ########################################
###################################
>
> Output should be like following
> ======================
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
>
> Thank you all in advance for your help.
>
> Regards
> SK
>
sed -n "/[^']*INSERT /,/[^']*)/p;/[^']*VALUES /,/[^']*)/p;" file
--
Michael Tosch @ hp : com
| |
| Stephane CHAZELAS 2007-05-22, 7:26 pm |
| 2007-05-22, 21:05(+02), Michael Tosch:
[...]
>
> It is safer to ensure that INSERT and VALUE do not appear inside a 'string':
>
> and an ERE needs \) to match a )
>
> awk "/[^']*INSERT/,/[^']*\)/;/[^']*VALUES/,/[^']*\)/"
[...]
/[^']*INSERT/ is equivalent to /INSERT/
given that [^']* matches the empty string.
Maybe you meant /^[^']*INSERT/
which may miss a "INSERT" that is not inside '...' as in
'...' INSERT
--
Stéphane
| |
| Janis Papanagnou 2007-05-22, 7:26 pm |
| Michael Tosch wrote:
> Janis Papanagnou wrote:
>
>
> It is safer to ensure that INSERT and VALUE do not appear inside a
> 'string':
>
> and an ERE needs \) to match a )
Not in awk. (You haven't tried?)
Janis
>
> awk "/[^']*INSERT/,/[^']*\)/;/[^']*VALUES/,/[^']*\)/"
>
>
| |
|
| sonal10july@gmail.com wrote:
> Hi Friends,
>
> I want all those lines lines from file starting with keyword 'INSERT'
> and end with ')'
> Note: In following example there are two ')' (one after 'INSERT' and
> another after 'VALUES') . I want all those lines between INSERT and
> second occurence of ')'
> ############
> Input File
> ########################################
###################################
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: START of accrual process for tid: 51745
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: Instrument tid is 51745
>
> INSERT common..dividend (
>
> dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306177, 51745, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.015,
> 'I', 0.7426385949277784, null, 1.381, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874793, null
> )
>
> ########################################
###################################
>
> Output should be like following
> ======================
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
>
> Thank you all in advance for your help.
>
> Regards
> SK
>
Presumably something along the lines of:
gawk '{if($0~"INSERT"){t=0;print $0
while(t<2){getline;print $0;if($0~"\)"){t+=1}}}
}' file
Regards,
Geoff
| |
| Stephane CHAZELAS 2007-05-22, 7:26 pm |
| 2007-05-22, 21:16(+02), Janis Papanagnou:
[...]
>
> Not in awk. (You haven't tried?)
[...]
$ awk '/)/'
awk: line 1: regular expression compile failed (missing '(')
Don't assume what is true in gawk is true in every awk
implementation.
POSIX requires the ")" do be escaped here, though it doesn't
require an awk implementation to treat un unmatched ")" as an
error.
--
Stéphane
| |
| Janis Papanagnou 2007-05-22, 7:26 pm |
| Stephane CHAZELAS wrote:
> 2007-05-22, 21:16(+02), Janis Papanagnou:
> [...]
>
>
> [...]
>
> $ awk '/)/'
> awk: line 1: regular expression compile failed (missing '(')
>
> Don't assume what is true in gawk is true in every awk
> implementation.
Neither --posix nor --traditional or --compat leads to that error in
GNU awk, so I didn't notice. (And lead me to a wrong assumption, my
fault.)
You're right. (And writing it the documented way is always to prefer,
anyway.)
Janis
>
> POSIX requires the ")" do be escaped here, though it doesn't
> require an awk implementation to treat un unmatched ")" as an
> error.
>
| |
| Michael Tosch 2007-05-23, 7:16 am |
| Michael Tosch wrote:
....
>
>
> sed -n "/[^']*INSERT /,/[^']*)/p;/[^']*VALUES /,/[^']*)/p;" file
>
>
>
Stephane opened my eyes, this should be:
sed -n "/^[^']*INSERT /,/^[^']*)/p;/^[^']*VALUES /,/^[^']*)/p;" file
--
Michael Tosch @ hp : com
| |
| Ed Morton 2007-05-23, 7:16 am |
| sonal10july@gmail.com wrote:
> Hi Friends,
>
> I want all those lines lines from file starting with keyword 'INSERT'
> and end with ')'
> Note: In following example there are two ')' (one after 'INSERT' and
> another after 'VALUES') . I want all those lines between INSERT and
> second occurence of ')'
> ############
> Input File
> ########################################
###################################
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: START of accrual process for tid: 51745
> May 21, 2007 6:17:11 PM com.esdi.dividend.DividendAccrual
> accrueDividends
> INFO: Instrument tid is 51745
>
> INSERT common..dividend (
>
> dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306177, 51745, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.015,
> 'I', 0.7426385949277784, null, 1.381, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874793, null
> )
>
> ########################################
###################################
>
> Output should be like following
> ======================
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
> INSERT common..dividend
> ( dividend_id,tid,declared_date,ex_date,re
cord_date,pay_date,amount,interim_final_
flag,
>
> fx_rate,stock_option_ratio,stock_price,t
reaty_tax_rate,non_treaty_tax_rate,curre
ncy_code,
>
> create_date,update_date,create_user_id,u
pdate_user_id,cam_id,orig_cam_id
> )
> VALUES (306181, 51758, '2007-05-11', '2007-05-21',
> '2007-05-21', '2007-05-24', 0.19,
> 'I', 0.7426385949277784, null, 4.818, null,
> null, 'EUR', '2007-05-11 03:04', '2007-05-11 03:04',
> 105,
> 105, 19874739, null
> )
>
>
> Thank you all in advance for your help.
>
> Regards
> SK
>
I'd go with:
awk '
$1~/^INSERT$/ { inBlock=1; block="" }
inBlock { block=block $0 ORS; if ($1~/^\)$/) inBlock++ }
inBlock==2 { printf "%s",block; inBlock=0 }
' file
just to keep it clear, simple, and easily extensible.
Ed.
|
|
|
|
|